From ccdb2d17df9f07c291d43b0aeea7c90e4c020489 Mon Sep 17 00:00:00 2001
From: Vincent Bernat <vincent@bernat.im>
Date: Sat, 15 Jul 2017 19:40:20 +0200
Subject: ip6: fix PMTU discovery when using /127 subnets

The definition of an "anycast destination address" has been tweaked as a
side-effect of commit 2647a9b07032 ("ipv6: Remove external dependency on
rt6i_gateway and RTF_ANYCAST"). The first address of a point-to-point
/127 subnet is now considered as an anycast address. This prevents
ICMPv6 errors to be returned to a sender of such a subnet and breaks
PMTU discovery.

This can be reproduced with:

    ip link add name out6 type veth peer name in6
    ip link add name out7 type veth peer name in7
    ip link set mtu 1400 dev out7
    ip link set mtu 1400 dev in7
    ip netns add next-hop
    ip netns add next-next-hop
    ip link set netns next-hop dev in6
    ip link set netns next-hop dev out7
    ip link set netns next-next-hop dev in7
    ip link set up dev out6
    ip addr add 2001:db8:1::12/127 dev out6
    ip netns exec next-hop ip link set up dev in6
    ip netns exec next-hop ip link set up dev out7
    ip netns exec next-hop ip addr add 2001:db8:1::13/127 dev in6
    ip netns exec next-hop ip addr add 2001:db8:1::14/127 dev out7
    ip netns exec next-hop ip route add default via 2001:db8:1::15
    ip netns exec next-hop sysctl -qw net.ipv6.conf.all.forwarding=1
    ip netns exec next-next-hop ip link set up dev in7
    ip netns exec next-next-hop ip addr add 2001:db8:1::15/127 dev in7
    ip netns exec next-next-hop ip addr add 2001:db8:1::50/128 dev in7
    ip netns exec next-next-hop ip route add default via 2001:db8:1::14
    ip netns exec next-next-hop sysctl -qw net.ipv6.conf.all.forwarding=1
    ip route add 2001:db8:1::48/123 via 2001:db8:1::13
    sleep 4
    ping -M do -s 1452 -c 3 2001:db8:1::50 || true
    ip route get 2001:db8:1::50

Before the patch, we get:

    2001:db8:1::50 from :: via 2001:db8:1::13 dev out6 src 2001:db8:1::12 metric 1024  pref medium

After the patch, we get:

    2001:db8:1::50 via 2001:db8:1::13 dev out6 src 2001:db8:1::12 metric 0
        cache  expires 578sec mtu 1400 pref medium

Fixes: 2647a9b07032 ("ipv6: Remove external dependency on rt6i_gateway and RTF_ANYCAST")
Signed-off-by: Vincent Bernat <vincent@bernat.im>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip6_route.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 199056933dcb..907d39a42f6b 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -194,7 +194,7 @@ static inline bool ipv6_anycast_destination(const struct dst_entry *dst,
 	struct rt6_info *rt = (struct rt6_info *)dst;
 
 	return rt->rt6i_flags & RTF_ANYCAST ||
-		(rt->rt6i_dst.plen != 128 &&
+		(rt->rt6i_dst.plen < 127 &&
 		 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr));
 }
 
-- 
cgit v1.2.3


From b02db702face3791889a4fcf06691c086648ee89 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Mon, 17 Jul 2017 11:29:57 +0800
Subject: sctp: remove the typedef sctp_random_param_t

This patch is to remove the typedef sctp_random_param_t, and
replace with struct sctp_random_param in the places where it's
using this typedef.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/sctp.h       | 4 ++--
 include/net/sctp/structs.h | 2 +-
 net/sctp/auth.c            | 9 ++++-----
 3 files changed, 7 insertions(+), 8 deletions(-)

(limited to 'include/net')

diff --git a/include/linux/sctp.h b/include/linux/sctp.h
index 76245685f923..9b1aa3907c9e 100644
--- a/include/linux/sctp.h
+++ b/include/linux/sctp.h
@@ -315,10 +315,10 @@ struct sctp_supported_ext_param {
 };
 
 /* AUTH Section 3.1 Random */
-typedef struct sctp_random_param {
+struct sctp_random_param {
 	struct sctp_paramhdr param_hdr;
 	__u8 random_val[0];
-} sctp_random_param_t;
+};
 
 /* AUTH Section 3.2 Chunk List */
 typedef struct sctp_chunks_param {
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 5ab29af8ca8a..f22c079fd1f1 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -1556,7 +1556,7 @@ struct sctp_association {
 		 * and authenticated chunk list.  All that is part of the
 		 * cookie and these are just pointers to those locations
 		 */
-		sctp_random_param_t *peer_random;
+		struct sctp_random_param *peer_random;
 		sctp_chunks_param_t *peer_chunks;
 		sctp_hmac_algo_param_t *peer_hmacs;
 	} peer;
diff --git a/net/sctp/auth.c b/net/sctp/auth.c
index e001b01b0e68..0d9c63eba978 100644
--- a/net/sctp/auth.c
+++ b/net/sctp/auth.c
@@ -185,7 +185,7 @@ static int sctp_auth_compare_vectors(struct sctp_auth_bytes *vector1,
  *    are called the two key vectors.
  */
 static struct sctp_auth_bytes *sctp_auth_make_key_vector(
-			sctp_random_param_t *random,
+			struct sctp_random_param *random,
 			sctp_chunks_param_t *chunks,
 			sctp_hmac_algo_param_t *hmacs,
 			gfp_t gfp)
@@ -226,10 +226,9 @@ static struct sctp_auth_bytes *sctp_auth_make_local_vector(
 				    gfp_t gfp)
 {
 	return sctp_auth_make_key_vector(
-				    (sctp_random_param_t *)asoc->c.auth_random,
-				    (sctp_chunks_param_t *)asoc->c.auth_chunks,
-				    (sctp_hmac_algo_param_t *)asoc->c.auth_hmacs,
-				    gfp);
+			(struct sctp_random_param *)asoc->c.auth_random,
+			(sctp_chunks_param_t *)asoc->c.auth_chunks,
+			(sctp_hmac_algo_param_t *)asoc->c.auth_hmacs, gfp);
 }
 
 /* Make a key vector based on peer's parameters */
-- 
cgit v1.2.3


From a762a9d94d44980e3690f9de87b918376daa6428 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Mon, 17 Jul 2017 11:29:58 +0800
Subject: sctp: remove the typedef sctp_chunks_param_t

This patch is to remove the typedef sctp_chunks_param_t, and
replace with struct sctp_chunks_param in the places where it's
using this typedef.

It is also to use sizeof(variable) instead of sizeof(type).

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/sctp.h       | 4 ++--
 include/net/sctp/structs.h | 2 +-
 net/sctp/auth.c            | 4 ++--
 net/sctp/endpointola.c     | 4 ++--
 4 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/net')

diff --git a/include/linux/sctp.h b/include/linux/sctp.h
index 9b1aa3907c9e..b52def9bcfa1 100644
--- a/include/linux/sctp.h
+++ b/include/linux/sctp.h
@@ -321,10 +321,10 @@ struct sctp_random_param {
 };
 
 /* AUTH Section 3.2 Chunk List */
-typedef struct sctp_chunks_param {
+struct sctp_chunks_param {
 	struct sctp_paramhdr param_hdr;
 	__u8 chunks[0];
-} sctp_chunks_param_t;
+};
 
 /* AUTH Section 3.3 HMAC Algorithm */
 typedef struct sctp_hmac_algo_param {
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index f22c079fd1f1..8042e6380b0e 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -1557,7 +1557,7 @@ struct sctp_association {
 		 * cookie and these are just pointers to those locations
 		 */
 		struct sctp_random_param *peer_random;
-		sctp_chunks_param_t *peer_chunks;
+		struct sctp_chunks_param *peer_chunks;
 		sctp_hmac_algo_param_t *peer_hmacs;
 	} peer;
 
diff --git a/net/sctp/auth.c b/net/sctp/auth.c
index 0d9c63eba978..367994d9712a 100644
--- a/net/sctp/auth.c
+++ b/net/sctp/auth.c
@@ -186,7 +186,7 @@ static int sctp_auth_compare_vectors(struct sctp_auth_bytes *vector1,
  */
 static struct sctp_auth_bytes *sctp_auth_make_key_vector(
 			struct sctp_random_param *random,
-			sctp_chunks_param_t *chunks,
+			struct sctp_chunks_param *chunks,
 			sctp_hmac_algo_param_t *hmacs,
 			gfp_t gfp)
 {
@@ -227,7 +227,7 @@ static struct sctp_auth_bytes *sctp_auth_make_local_vector(
 {
 	return sctp_auth_make_key_vector(
 			(struct sctp_random_param *)asoc->c.auth_random,
-			(sctp_chunks_param_t *)asoc->c.auth_chunks,
+			(struct sctp_chunks_param *)asoc->c.auth_chunks,
 			(sctp_hmac_algo_param_t *)asoc->c.auth_hmacs, gfp);
 }
 
diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index 0e86f988f836..35bf5af124fc 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c
@@ -78,8 +78,8 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep,
 		if (!auth_hmacs)
 			goto nomem;
 
-		auth_chunks = kzalloc(sizeof(sctp_chunks_param_t) +
-					SCTP_NUM_CHUNK_TYPES, gfp);
+		auth_chunks = kzalloc(sizeof(*auth_chunks) +
+				      SCTP_NUM_CHUNK_TYPES, gfp);
 		if (!auth_chunks)
 			goto nomem;
 
-- 
cgit v1.2.3


From 1474774a7f0daf9878fd9537a24714f419e744ed Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Mon, 17 Jul 2017 11:29:59 +0800
Subject: sctp: remove the typedef sctp_hmac_algo_param_t

This patch is to remove the typedef sctp_hmac_algo_param_t, and
replace with struct sctp_hmac_algo_param in the places where it's
using this typedef.

It is also to use sizeof(variable) instead of sizeof(type).

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/sctp.h       | 4 ++--
 include/net/sctp/structs.h | 2 +-
 net/sctp/auth.c            | 4 ++--
 net/sctp/endpointola.c     | 4 ++--
 4 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/net')

diff --git a/include/linux/sctp.h b/include/linux/sctp.h
index b52def9bcfa1..913474dfc96c 100644
--- a/include/linux/sctp.h
+++ b/include/linux/sctp.h
@@ -327,10 +327,10 @@ struct sctp_chunks_param {
 };
 
 /* AUTH Section 3.3 HMAC Algorithm */
-typedef struct sctp_hmac_algo_param {
+struct sctp_hmac_algo_param {
 	struct sctp_paramhdr param_hdr;
 	__be16 hmac_ids[0];
-} sctp_hmac_algo_param_t;
+};
 
 /* RFC 2960.  Section 3.3.3 Initiation Acknowledgement (INIT ACK) (2):
  *   The INIT ACK chunk is used to acknowledge the initiation of an SCTP
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 8042e6380b0e..66cd7639b912 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -1558,7 +1558,7 @@ struct sctp_association {
 		 */
 		struct sctp_random_param *peer_random;
 		struct sctp_chunks_param *peer_chunks;
-		sctp_hmac_algo_param_t *peer_hmacs;
+		struct sctp_hmac_algo_param *peer_hmacs;
 	} peer;
 
 	/* State       : A state variable indicating what state the
diff --git a/net/sctp/auth.c b/net/sctp/auth.c
index 367994d9712a..00667c50efa7 100644
--- a/net/sctp/auth.c
+++ b/net/sctp/auth.c
@@ -187,7 +187,7 @@ static int sctp_auth_compare_vectors(struct sctp_auth_bytes *vector1,
 static struct sctp_auth_bytes *sctp_auth_make_key_vector(
 			struct sctp_random_param *random,
 			struct sctp_chunks_param *chunks,
-			sctp_hmac_algo_param_t *hmacs,
+			struct sctp_hmac_algo_param *hmacs,
 			gfp_t gfp)
 {
 	struct sctp_auth_bytes *new;
@@ -228,7 +228,7 @@ static struct sctp_auth_bytes *sctp_auth_make_local_vector(
 	return sctp_auth_make_key_vector(
 			(struct sctp_random_param *)asoc->c.auth_random,
 			(struct sctp_chunks_param *)asoc->c.auth_chunks,
-			(sctp_hmac_algo_param_t *)asoc->c.auth_hmacs, gfp);
+			(struct sctp_hmac_algo_param *)asoc->c.auth_hmacs, gfp);
 }
 
 /* Make a key vector based on peer's parameters */
diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index 35bf5af124fc..3d506b2f6193 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c
@@ -73,8 +73,8 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep,
 		 * variables.  There are arrays that we encode directly
 		 * into parameters to make the rest of the operations easier.
 		 */
-		auth_hmacs = kzalloc(sizeof(sctp_hmac_algo_param_t) +
-				sizeof(__u16) * SCTP_AUTH_NUM_HMACS, gfp);
+		auth_hmacs = kzalloc(sizeof(*auth_hmacs) +
+				     sizeof(__u16) * SCTP_AUTH_NUM_HMACS, gfp);
 		if (!auth_hmacs)
 			goto nomem;
 
-- 
cgit v1.2.3


From 27eac47b00789522ba00501b0838026e1ecb6f05 Mon Sep 17 00:00:00 2001
From: David Herrmann <dh.herrmann@gmail.com>
Date: Mon, 17 Jul 2017 11:35:54 +0200
Subject: net/unix: drop obsolete fd-recursion limits

All unix sockets now account inflight FDs to the respective sender.
This was introduced in:

    commit 712f4aad406bb1ed67f3f98d04c044191f0ff593
    Author: willy tarreau <w@1wt.eu>
    Date:   Sun Jan 10 07:54:56 2016 +0100

        unix: properly account for FDs passed over unix sockets

and further refined in:

    commit 415e3d3e90ce9e18727e8843ae343eda5a58fad6
    Author: Hannes Frederic Sowa <hannes@stressinduktion.org>
    Date:   Wed Feb 3 02:11:03 2016 +0100

        unix: correctly track in-flight fds in sending process user_struct

Hence, regardless of the stacking depth of FDs, the total number of
inflight FDs is limited, and accounted. There is no known way for a
local user to exceed those limits or exploit the accounting.

Furthermore, the GC logic is independent of the recursion/stacking depth
as well. It solely depends on the total number of inflight FDs,
regardless of their layout.

Lastly, the current `recursion_level' suffers a TOCTOU race, since it
checks and inherits depths only at queue time. If we consider `A<-B' to
mean `queue-B-on-A', the following sequence circumvents the recursion
level easily:

    A<-B
       B<-C
          C<-D
             ...
               Y<-Z

resulting in:

    A<-B<-C<-...<-Z

With all of this in mind, lets drop the recursion limit. It has no
additional security value, anymore. On the contrary, it randomly
confuses message brokers that try to forward file-descriptors, since
any sendmsg(2) call can fail spuriously with ETOOMANYREFS if a client
maliciously modifies the FD while inflight.

Cc: Alban Crequy <alban.crequy@collabora.co.uk>
Cc: Simon McVittie <simon.mcvittie@collabora.co.uk>
Signed-off-by: David Herrmann <dh.herrmann@gmail.com>
Reviewed-by: Tom Gundersen <teg@jklm.no>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/af_unix.h |  1 -
 net/unix/af_unix.c    | 24 +-----------------------
 2 files changed, 1 insertion(+), 24 deletions(-)

(limited to 'include/net')

diff --git a/include/net/af_unix.h b/include/net/af_unix.h
index 678e4d6fa317..3b3194b2fc65 100644
--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h
@@ -58,7 +58,6 @@ struct unix_sock {
 	struct list_head	link;
 	atomic_long_t		inflight;
 	spinlock_t		lock;
-	unsigned char		recursion_level;
 	unsigned long		gc_flags;
 #define UNIX_GC_CANDIDATE	0
 #define UNIX_GC_MAYBE_CYCLE	1
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 7b52a380d710..5c53f22d62e8 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1528,26 +1528,13 @@ static inline bool too_many_unix_fds(struct task_struct *p)
 	return false;
 }
 
-#define MAX_RECURSION_LEVEL 4
-
 static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
 {
 	int i;
-	unsigned char max_level = 0;
 
 	if (too_many_unix_fds(current))
 		return -ETOOMANYREFS;
 
-	for (i = scm->fp->count - 1; i >= 0; i--) {
-		struct sock *sk = unix_get_socket(scm->fp->fp[i]);
-
-		if (sk)
-			max_level = max(max_level,
-					unix_sk(sk)->recursion_level);
-	}
-	if (unlikely(max_level > MAX_RECURSION_LEVEL))
-		return -ETOOMANYREFS;
-
 	/*
 	 * Need to duplicate file references for the sake of garbage
 	 * collection.  Otherwise a socket in the fps might become a
@@ -1559,7 +1546,7 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
 
 	for (i = scm->fp->count - 1; i >= 0; i--)
 		unix_inflight(scm->fp->user, scm->fp->fp[i]);
-	return max_level;
+	return 0;
 }
 
 static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
@@ -1649,7 +1636,6 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
 	struct sk_buff *skb;
 	long timeo;
 	struct scm_cookie scm;
-	int max_level;
 	int data_len = 0;
 	int sk_locked;
 
@@ -1701,7 +1687,6 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
 	err = unix_scm_to_skb(&scm, skb, true);
 	if (err < 0)
 		goto out_free;
-	max_level = err + 1;
 
 	skb_put(skb, len - data_len);
 	skb->data_len = data_len;
@@ -1819,8 +1804,6 @@ restart_locked:
 		__net_timestamp(skb);
 	maybe_add_creds(skb, sock, other);
 	skb_queue_tail(&other->sk_receive_queue, skb);
-	if (max_level > unix_sk(other)->recursion_level)
-		unix_sk(other)->recursion_level = max_level;
 	unix_state_unlock(other);
 	other->sk_data_ready(other);
 	sock_put(other);
@@ -1855,7 +1838,6 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
 	int sent = 0;
 	struct scm_cookie scm;
 	bool fds_sent = false;
-	int max_level;
 	int data_len;
 
 	wait_for_unix_gc();
@@ -1905,7 +1887,6 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
 			kfree_skb(skb);
 			goto out_err;
 		}
-		max_level = err + 1;
 		fds_sent = true;
 
 		skb_put(skb, size - data_len);
@@ -1925,8 +1906,6 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
 
 		maybe_add_creds(skb, sock, other);
 		skb_queue_tail(&other->sk_receive_queue, skb);
-		if (max_level > unix_sk(other)->recursion_level)
-			unix_sk(other)->recursion_level = max_level;
 		unix_state_unlock(other);
 		other->sk_data_ready(other);
 		sent += size;
@@ -2324,7 +2303,6 @@ redo:
 		last_len = last ? last->len : 0;
 again:
 		if (skb == NULL) {
-			unix_sk(sk)->recursion_level = 0;
 			if (copied >= target)
 				goto unlock;
 
-- 
cgit v1.2.3


From b145425f269a17ed344d737f746b844dfac60c82 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 17 Jul 2017 02:56:10 -0700
Subject: inetpeer: remove AVL implementation in favor of RB tree

As discussed in Faro during Netfilter Workshop 2017, RB trees can be
used with RCU, using a seqlock.

Note that net/rxrpc/conn_service.c is already using this.

This patch converts inetpeer from AVL tree to RB tree, since it allows
to remove private AVL implementation in favor of shared RB code.

$ size net/ipv4/inetpeer.before net/ipv4/inetpeer.after
   text    data     bss     dec     hex filename
   3195      40     128    3363     d23 net/ipv4/inetpeer.before
   1562      24       0    1586     632 net/ipv4/inetpeer.after

The same technique can be used to speed up
net/netfilter/nft_set_rbtree.c (removing rwlock contention in fast path)

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inetpeer.h |  11 +-
 net/ipv4/inetpeer.c    | 428 +++++++++++--------------------------------------
 2 files changed, 92 insertions(+), 347 deletions(-)

(limited to 'include/net')

diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h
index f2a215fc78e4..950ed182f62f 100644
--- a/include/net/inetpeer.h
+++ b/include/net/inetpeer.h
@@ -33,18 +33,12 @@ struct inetpeer_addr {
 };
 
 struct inet_peer {
-	/* group together avl_left,avl_right,v4daddr to speedup lookups */
-	struct inet_peer __rcu	*avl_left, *avl_right;
+	struct rb_node		rb_node;
 	struct inetpeer_addr	daddr;
-	__u32			avl_height;
 
 	u32			metrics[RTAX_MAX];
 	u32			rate_tokens;	/* rate limiting for ICMP */
 	unsigned long		rate_last;
-	union {
-		struct list_head	gc_list;
-		struct rcu_head     gc_rcu;
-	};
 	/*
 	 * Once inet_peer is queued for deletion (refcnt == 0), following field
 	 * is not available: rid
@@ -55,7 +49,6 @@ struct inet_peer {
 			atomic_t			rid;		/* Frag reception counter */
 		};
 		struct rcu_head         rcu;
-		struct inet_peer	*gc_next;
 	};
 
 	/* following fields might be frequently dirtied */
@@ -64,7 +57,7 @@ struct inet_peer {
 };
 
 struct inet_peer_base {
-	struct inet_peer __rcu	*root;
+	struct rb_root		rb_root;
 	seqlock_t		lock;
 	int			total;
 };
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index c5a117cc6619..337ad41bb80a 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -33,7 +33,7 @@
  *  also be removed if the pool is overloaded i.e. if the total amount of
  *  entries is greater-or-equal than the threshold.
  *
- *  Node pool is organised as an AVL tree.
+ *  Node pool is organised as an RB tree.
  *  Such an implementation has been chosen not just for fun.  It's a way to
  *  prevent easy and efficient DoS attacks by creating hash collisions.  A huge
  *  amount of long living nodes in a single hash slot would significantly delay
@@ -45,7 +45,7 @@
  *      AND reference count being 0.
  *  3.  Global variable peer_total is modified under the pool lock.
  *  4.  struct inet_peer fields modification:
- *		avl_left, avl_right, avl_parent, avl_height: pool lock
+ *		rb_node: pool lock
  *		refcnt: atomically against modifications on other CPU;
  *		   usually under some other lock to prevent node disappearing
  *		daddr: unchangeable
@@ -53,30 +53,15 @@
 
 static struct kmem_cache *peer_cachep __read_mostly;
 
-static LIST_HEAD(gc_list);
-static const int gc_delay = 60 * HZ;
-static struct delayed_work gc_work;
-static DEFINE_SPINLOCK(gc_lock);
-
-#define node_height(x) x->avl_height
-
-#define peer_avl_empty ((struct inet_peer *)&peer_fake_node)
-#define peer_avl_empty_rcu ((struct inet_peer __rcu __force *)&peer_fake_node)
-static const struct inet_peer peer_fake_node = {
-	.avl_left	= peer_avl_empty_rcu,
-	.avl_right	= peer_avl_empty_rcu,
-	.avl_height	= 0
-};
-
 void inet_peer_base_init(struct inet_peer_base *bp)
 {
-	bp->root = peer_avl_empty_rcu;
+	bp->rb_root = RB_ROOT;
 	seqlock_init(&bp->lock);
 	bp->total = 0;
 }
 EXPORT_SYMBOL_GPL(inet_peer_base_init);
 
-#define PEER_MAXDEPTH 40 /* sufficient for about 2^27 nodes */
+#define PEER_MAX_GC 32
 
 /* Exported for sysctl_net_ipv4.  */
 int inet_peer_threshold __read_mostly = 65536 + 128;	/* start to throw entries more
@@ -84,53 +69,6 @@ int inet_peer_threshold __read_mostly = 65536 + 128;	/* start to throw entries m
 int inet_peer_minttl __read_mostly = 120 * HZ;	/* TTL under high load: 120 sec */
 int inet_peer_maxttl __read_mostly = 10 * 60 * HZ;	/* usual time to live: 10 min */
 
-static void inetpeer_gc_worker(struct work_struct *work)
-{
-	struct inet_peer *p, *n, *c;
-	struct list_head list;
-
-	spin_lock_bh(&gc_lock);
-	list_replace_init(&gc_list, &list);
-	spin_unlock_bh(&gc_lock);
-
-	if (list_empty(&list))
-		return;
-
-	list_for_each_entry_safe(p, n, &list, gc_list) {
-
-		if (need_resched())
-			cond_resched();
-
-		c = rcu_dereference_protected(p->avl_left, 1);
-		if (c != peer_avl_empty) {
-			list_add_tail(&c->gc_list, &list);
-			p->avl_left = peer_avl_empty_rcu;
-		}
-
-		c = rcu_dereference_protected(p->avl_right, 1);
-		if (c != peer_avl_empty) {
-			list_add_tail(&c->gc_list, &list);
-			p->avl_right = peer_avl_empty_rcu;
-		}
-
-		n = list_entry(p->gc_list.next, struct inet_peer, gc_list);
-
-		if (refcount_read(&p->refcnt) == 1) {
-			list_del(&p->gc_list);
-			kmem_cache_free(peer_cachep, p);
-		}
-	}
-
-	if (list_empty(&list))
-		return;
-
-	spin_lock_bh(&gc_lock);
-	list_splice(&list, &gc_list);
-	spin_unlock_bh(&gc_lock);
-
-	schedule_delayed_work(&gc_work, gc_delay);
-}
-
 /* Called from ip_output.c:ip_init  */
 void __init inet_initpeers(void)
 {
@@ -153,225 +91,62 @@ void __init inet_initpeers(void)
 			sizeof(struct inet_peer),
 			0, SLAB_HWCACHE_ALIGN | SLAB_PANIC,
 			NULL);
-
-	INIT_DEFERRABLE_WORK(&gc_work, inetpeer_gc_worker);
 }
 
-#define rcu_deref_locked(X, BASE)				\
-	rcu_dereference_protected(X, lockdep_is_held(&(BASE)->lock.lock))
-
-/*
- * Called with local BH disabled and the pool lock held.
- */
-#define lookup(_daddr, _stack, _base)				\
-({								\
-	struct inet_peer *u;					\
-	struct inet_peer __rcu **v;				\
-								\
-	stackptr = _stack;					\
-	*stackptr++ = &_base->root;				\
-	for (u = rcu_deref_locked(_base->root, _base);		\
-	     u != peer_avl_empty;) {				\
-		int cmp = inetpeer_addr_cmp(_daddr, &u->daddr);	\
-		if (cmp == 0)					\
-			break;					\
-		if (cmp == -1)					\
-			v = &u->avl_left;			\
-		else						\
-			v = &u->avl_right;			\
-		*stackptr++ = v;				\
-		u = rcu_deref_locked(*v, _base);		\
-	}							\
-	u;							\
-})
-
-/*
- * Called with rcu_read_lock()
- * Because we hold no lock against a writer, its quite possible we fall
- * in an endless loop.
- * But every pointer we follow is guaranteed to be valid thanks to RCU.
- * We exit from this function if number of links exceeds PEER_MAXDEPTH
- */
-static struct inet_peer *lookup_rcu(const struct inetpeer_addr *daddr,
-				    struct inet_peer_base *base)
+/* Called with rcu_read_lock() or base->lock held */
+static struct inet_peer *lookup(const struct inetpeer_addr *daddr,
+				struct inet_peer_base *base,
+				unsigned int seq,
+				struct inet_peer *gc_stack[],
+				unsigned int *gc_cnt,
+				struct rb_node **parent_p,
+				struct rb_node ***pp_p)
 {
-	struct inet_peer *u = rcu_dereference(base->root);
-	int count = 0;
+	struct rb_node **pp, *parent;
+	struct inet_peer *p;
+
+	pp = &base->rb_root.rb_node;
+	parent = NULL;
+	while (*pp) {
+		int cmp;
 
-	while (u != peer_avl_empty) {
-		int cmp = inetpeer_addr_cmp(daddr, &u->daddr);
+		parent = rcu_dereference_raw(*pp);
+		p = rb_entry(parent, struct inet_peer, rb_node);
+		cmp = inetpeer_addr_cmp(daddr, &p->daddr);
 		if (cmp == 0) {
-			/* Before taking a reference, check if this entry was
-			 * deleted (refcnt=0)
-			 */
-			if (!refcount_inc_not_zero(&u->refcnt)) {
-				u = NULL;
-			}
-			return u;
+			if (!refcount_inc_not_zero(&p->refcnt))
+				break;
+			return p;
+		}
+		if (gc_stack) {
+			if (*gc_cnt < PEER_MAX_GC)
+				gc_stack[(*gc_cnt)++] = p;
+		} else if (unlikely(read_seqretry(&base->lock, seq))) {
+			break;
 		}
 		if (cmp == -1)
-			u = rcu_dereference(u->avl_left);
+			pp = &(*pp)->rb_left;
 		else
-			u = rcu_dereference(u->avl_right);
-		if (unlikely(++count == PEER_MAXDEPTH))
-			break;
+			pp = &(*pp)->rb_right;
 	}
+	*parent_p = parent;
+	*pp_p = pp;
 	return NULL;
 }
 
-/* Called with local BH disabled and the pool lock held. */
-#define lookup_rightempty(start, base)				\
-({								\
-	struct inet_peer *u;					\
-	struct inet_peer __rcu **v;				\
-	*stackptr++ = &start->avl_left;				\
-	v = &start->avl_left;					\
-	for (u = rcu_deref_locked(*v, base);			\
-	     u->avl_right != peer_avl_empty_rcu;) {		\
-		v = &u->avl_right;				\
-		*stackptr++ = v;				\
-		u = rcu_deref_locked(*v, base);			\
-	}							\
-	u;							\
-})
-
-/* Called with local BH disabled and the pool lock held.
- * Variable names are the proof of operation correctness.
- * Look into mm/map_avl.c for more detail description of the ideas.
- */
-static void peer_avl_rebalance(struct inet_peer __rcu **stack[],
-			       struct inet_peer __rcu ***stackend,
-			       struct inet_peer_base *base)
-{
-	struct inet_peer __rcu **nodep;
-	struct inet_peer *node, *l, *r;
-	int lh, rh;
-
-	while (stackend > stack) {
-		nodep = *--stackend;
-		node = rcu_deref_locked(*nodep, base);
-		l = rcu_deref_locked(node->avl_left, base);
-		r = rcu_deref_locked(node->avl_right, base);
-		lh = node_height(l);
-		rh = node_height(r);
-		if (lh > rh + 1) { /* l: RH+2 */
-			struct inet_peer *ll, *lr, *lrl, *lrr;
-			int lrh;
-			ll = rcu_deref_locked(l->avl_left, base);
-			lr = rcu_deref_locked(l->avl_right, base);
-			lrh = node_height(lr);
-			if (lrh <= node_height(ll)) {	/* ll: RH+1 */
-				RCU_INIT_POINTER(node->avl_left, lr);	/* lr: RH or RH+1 */
-				RCU_INIT_POINTER(node->avl_right, r);	/* r: RH */
-				node->avl_height = lrh + 1; /* RH+1 or RH+2 */
-				RCU_INIT_POINTER(l->avl_left, ll);       /* ll: RH+1 */
-				RCU_INIT_POINTER(l->avl_right, node);	/* node: RH+1 or RH+2 */
-				l->avl_height = node->avl_height + 1;
-				RCU_INIT_POINTER(*nodep, l);
-			} else { /* ll: RH, lr: RH+1 */
-				lrl = rcu_deref_locked(lr->avl_left, base);/* lrl: RH or RH-1 */
-				lrr = rcu_deref_locked(lr->avl_right, base);/* lrr: RH or RH-1 */
-				RCU_INIT_POINTER(node->avl_left, lrr);	/* lrr: RH or RH-1 */
-				RCU_INIT_POINTER(node->avl_right, r);	/* r: RH */
-				node->avl_height = rh + 1; /* node: RH+1 */
-				RCU_INIT_POINTER(l->avl_left, ll);	/* ll: RH */
-				RCU_INIT_POINTER(l->avl_right, lrl);	/* lrl: RH or RH-1 */
-				l->avl_height = rh + 1;	/* l: RH+1 */
-				RCU_INIT_POINTER(lr->avl_left, l);	/* l: RH+1 */
-				RCU_INIT_POINTER(lr->avl_right, node);	/* node: RH+1 */
-				lr->avl_height = rh + 2;
-				RCU_INIT_POINTER(*nodep, lr);
-			}
-		} else if (rh > lh + 1) { /* r: LH+2 */
-			struct inet_peer *rr, *rl, *rlr, *rll;
-			int rlh;
-			rr = rcu_deref_locked(r->avl_right, base);
-			rl = rcu_deref_locked(r->avl_left, base);
-			rlh = node_height(rl);
-			if (rlh <= node_height(rr)) {	/* rr: LH+1 */
-				RCU_INIT_POINTER(node->avl_right, rl);	/* rl: LH or LH+1 */
-				RCU_INIT_POINTER(node->avl_left, l);	/* l: LH */
-				node->avl_height = rlh + 1; /* LH+1 or LH+2 */
-				RCU_INIT_POINTER(r->avl_right, rr);	/* rr: LH+1 */
-				RCU_INIT_POINTER(r->avl_left, node);	/* node: LH+1 or LH+2 */
-				r->avl_height = node->avl_height + 1;
-				RCU_INIT_POINTER(*nodep, r);
-			} else { /* rr: RH, rl: RH+1 */
-				rlr = rcu_deref_locked(rl->avl_right, base);/* rlr: LH or LH-1 */
-				rll = rcu_deref_locked(rl->avl_left, base);/* rll: LH or LH-1 */
-				RCU_INIT_POINTER(node->avl_right, rll);	/* rll: LH or LH-1 */
-				RCU_INIT_POINTER(node->avl_left, l);	/* l: LH */
-				node->avl_height = lh + 1; /* node: LH+1 */
-				RCU_INIT_POINTER(r->avl_right, rr);	/* rr: LH */
-				RCU_INIT_POINTER(r->avl_left, rlr);	/* rlr: LH or LH-1 */
-				r->avl_height = lh + 1;	/* r: LH+1 */
-				RCU_INIT_POINTER(rl->avl_right, r);	/* r: LH+1 */
-				RCU_INIT_POINTER(rl->avl_left, node);	/* node: LH+1 */
-				rl->avl_height = lh + 2;
-				RCU_INIT_POINTER(*nodep, rl);
-			}
-		} else {
-			node->avl_height = (lh > rh ? lh : rh) + 1;
-		}
-	}
-}
-
-/* Called with local BH disabled and the pool lock held. */
-#define link_to_pool(n, base)					\
-do {								\
-	n->avl_height = 1;					\
-	n->avl_left = peer_avl_empty_rcu;			\
-	n->avl_right = peer_avl_empty_rcu;			\
-	/* lockless readers can catch us now */			\
-	rcu_assign_pointer(**--stackptr, n);			\
-	peer_avl_rebalance(stack, stackptr, base);		\
-} while (0)
-
 static void inetpeer_free_rcu(struct rcu_head *head)
 {
 	kmem_cache_free(peer_cachep, container_of(head, struct inet_peer, rcu));
 }
 
-static void unlink_from_pool(struct inet_peer *p, struct inet_peer_base *base,
-			     struct inet_peer __rcu **stack[PEER_MAXDEPTH])
-{
-	struct inet_peer __rcu ***stackptr, ***delp;
-
-	if (lookup(&p->daddr, stack, base) != p)
-		BUG();
-	delp = stackptr - 1; /* *delp[0] == p */
-	if (p->avl_left == peer_avl_empty_rcu) {
-		*delp[0] = p->avl_right;
-		--stackptr;
-	} else {
-		/* look for a node to insert instead of p */
-		struct inet_peer *t;
-		t = lookup_rightempty(p, base);
-		BUG_ON(rcu_deref_locked(*stackptr[-1], base) != t);
-		**--stackptr = t->avl_left;
-		/* t is removed, t->daddr > x->daddr for any
-		 * x in p->avl_left subtree.
-		 * Put t in the old place of p. */
-		RCU_INIT_POINTER(*delp[0], t);
-		t->avl_left = p->avl_left;
-		t->avl_right = p->avl_right;
-		t->avl_height = p->avl_height;
-		BUG_ON(delp[1] != &p->avl_left);
-		delp[1] = &t->avl_left; /* was &p->avl_left */
-	}
-	peer_avl_rebalance(stack, stackptr, base);
-	base->total--;
-	call_rcu(&p->rcu, inetpeer_free_rcu);
-}
-
 /* perform garbage collect on all items stacked during a lookup */
-static int inet_peer_gc(struct inet_peer_base *base,
-			struct inet_peer __rcu **stack[PEER_MAXDEPTH],
-			struct inet_peer __rcu ***stackptr)
+static void inet_peer_gc(struct inet_peer_base *base,
+			 struct inet_peer *gc_stack[],
+			 unsigned int gc_cnt)
 {
-	struct inet_peer *p, *gchead = NULL;
+	struct inet_peer *p;
 	__u32 delta, ttl;
-	int cnt = 0;
+	int i;
 
 	if (base->total >= inet_peer_threshold)
 		ttl = 0; /* be aggressive */
@@ -379,43 +154,38 @@ static int inet_peer_gc(struct inet_peer_base *base,
 		ttl = inet_peer_maxttl
 				- (inet_peer_maxttl - inet_peer_minttl) / HZ *
 					base->total / inet_peer_threshold * HZ;
-	stackptr--; /* last stack slot is peer_avl_empty */
-	while (stackptr > stack) {
-		stackptr--;
-		p = rcu_deref_locked(**stackptr, base);
-		if (refcount_read(&p->refcnt) == 1) {
-			smp_rmb();
-			delta = (__u32)jiffies - p->dtime;
-			if (delta >= ttl && refcount_dec_if_one(&p->refcnt)) {
-				p->gc_next = gchead;
-				gchead = p;
-			}
-		}
+	for (i = 0; i < gc_cnt; i++) {
+		p = gc_stack[i];
+		delta = (__u32)jiffies - p->dtime;
+		if (delta < ttl || !refcount_dec_if_one(&p->refcnt))
+			gc_stack[i] = NULL;
 	}
-	while ((p = gchead) != NULL) {
-		gchead = p->gc_next;
-		cnt++;
-		unlink_from_pool(p, base, stack);
+	for (i = 0; i < gc_cnt; i++) {
+		p = gc_stack[i];
+		if (p) {
+			rb_erase(&p->rb_node, &base->rb_root);
+			base->total--;
+			call_rcu(&p->rcu, inetpeer_free_rcu);
+		}
 	}
-	return cnt;
 }
 
 struct inet_peer *inet_getpeer(struct inet_peer_base *base,
 			       const struct inetpeer_addr *daddr,
 			       int create)
 {
-	struct inet_peer __rcu **stack[PEER_MAXDEPTH], ***stackptr;
-	struct inet_peer *p;
-	unsigned int sequence;
-	int invalidated, gccnt = 0;
+	struct inet_peer *p, *gc_stack[PEER_MAX_GC];
+	struct rb_node **pp, *parent;
+	unsigned int gc_cnt, seq;
+	int invalidated;
 
 	/* Attempt a lockless lookup first.
 	 * Because of a concurrent writer, we might not find an existing entry.
 	 */
 	rcu_read_lock();
-	sequence = read_seqbegin(&base->lock);
-	p = lookup_rcu(daddr, base);
-	invalidated = read_seqretry(&base->lock, sequence);
+	seq = read_seqbegin(&base->lock);
+	p = lookup(daddr, base, seq, NULL, &gc_cnt, &parent, &pp);
+	invalidated = read_seqretry(&base->lock, seq);
 	rcu_read_unlock();
 
 	if (p)
@@ -428,36 +198,31 @@ struct inet_peer *inet_getpeer(struct inet_peer_base *base,
 	/* retry an exact lookup, taking the lock before.
 	 * At least, nodes should be hot in our cache.
 	 */
+	parent = NULL;
 	write_seqlock_bh(&base->lock);
-relookup:
-	p = lookup(daddr, stack, base);
-	if (p != peer_avl_empty) {
-		refcount_inc(&p->refcnt);
-		write_sequnlock_bh(&base->lock);
-		return p;
-	}
-	if (!gccnt) {
-		gccnt = inet_peer_gc(base, stack, stackptr);
-		if (gccnt && create)
-			goto relookup;
-	}
-	p = create ? kmem_cache_alloc(peer_cachep, GFP_ATOMIC) : NULL;
-	if (p) {
-		p->daddr = *daddr;
-		refcount_set(&p->refcnt, 2);
-		atomic_set(&p->rid, 0);
-		p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW;
-		p->rate_tokens = 0;
-		/* 60*HZ is arbitrary, but chosen enough high so that the first
-		 * calculation of tokens is at its maximum.
-		 */
-		p->rate_last = jiffies - 60*HZ;
-		INIT_LIST_HEAD(&p->gc_list);
 
-		/* Link the node. */
-		link_to_pool(p, base);
-		base->total++;
+	gc_cnt = 0;
+	p = lookup(daddr, base, seq, gc_stack, &gc_cnt, &parent, &pp);
+	if (!p && create) {
+		p = kmem_cache_alloc(peer_cachep, GFP_ATOMIC);
+		if (p) {
+			p->daddr = *daddr;
+			refcount_set(&p->refcnt, 2);
+			atomic_set(&p->rid, 0);
+			p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW;
+			p->rate_tokens = 0;
+			/* 60*HZ is arbitrary, but chosen enough high so that the first
+			 * calculation of tokens is at its maximum.
+			 */
+			p->rate_last = jiffies - 60*HZ;
+
+			rb_link_node(&p->rb_node, parent, pp);
+			rb_insert_color(&p->rb_node, &base->rb_root);
+			base->total++;
+		}
 	}
+	if (gc_cnt)
+		inet_peer_gc(base, gc_stack, gc_cnt);
 	write_sequnlock_bh(&base->lock);
 
 	return p;
@@ -467,8 +232,9 @@ EXPORT_SYMBOL_GPL(inet_getpeer);
 void inet_putpeer(struct inet_peer *p)
 {
 	p->dtime = (__u32)jiffies;
-	smp_mb__before_atomic();
-	refcount_dec(&p->refcnt);
+
+	if (refcount_dec_and_test(&p->refcnt))
+		call_rcu(&p->rcu, inetpeer_free_rcu);
 }
 EXPORT_SYMBOL_GPL(inet_putpeer);
 
@@ -513,30 +279,16 @@ bool inet_peer_xrlim_allow(struct inet_peer *peer, int timeout)
 }
 EXPORT_SYMBOL(inet_peer_xrlim_allow);
 
-static void inetpeer_inval_rcu(struct rcu_head *head)
-{
-	struct inet_peer *p = container_of(head, struct inet_peer, gc_rcu);
-
-	spin_lock_bh(&gc_lock);
-	list_add_tail(&p->gc_list, &gc_list);
-	spin_unlock_bh(&gc_lock);
-
-	schedule_delayed_work(&gc_work, gc_delay);
-}
-
 void inetpeer_invalidate_tree(struct inet_peer_base *base)
 {
-	struct inet_peer *root;
-
-	write_seqlock_bh(&base->lock);
+	struct inet_peer *p, *n;
 
-	root = rcu_deref_locked(base->root, base);
-	if (root != peer_avl_empty) {
-		base->root = peer_avl_empty_rcu;
-		base->total = 0;
-		call_rcu(&root->gc_rcu, inetpeer_inval_rcu);
+	rbtree_postorder_for_each_entry_safe(p, n, &base->rb_root, rb_node) {
+		inet_putpeer(p);
+		cond_resched();
 	}
 
-	write_sequnlock_bh(&base->lock);
+	base->rb_root = RB_ROOT;
+	base->total = 0;
 }
 EXPORT_SYMBOL(inetpeer_invalidate_tree);
-- 
cgit v1.2.3


From 09c7570480f7544ffbf8e6db365208b0b0c154c6 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 17 Jul 2017 13:57:26 +0200
Subject: xfrm: remove flow cache

After rcu conversions performance degradation in forward tests isn't that
noticeable anymore.

See next patch for some numbers.

A followup patcg could then also remove genid from the policies
as we do not cache bundles anymore.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/flow.h              |  34 ---
 include/net/flowcache.h         |  25 --
 include/net/netns/xfrm.h        |  11 -
 include/net/xfrm.h              |   8 -
 net/core/Makefile               |   1 -
 net/core/flow.c                 | 516 ----------------------------------------
 net/ipv4/xfrm4_policy.c         |   9 -
 net/ipv6/xfrm6_policy.c         |   9 -
 net/key/af_key.c                |   6 -
 net/xfrm/xfrm_device.c          |   2 -
 net/xfrm/xfrm_policy.c          | 108 +--------
 net/xfrm/xfrm_user.c            |   3 -
 security/selinux/include/xfrm.h |   4 +-
 13 files changed, 2 insertions(+), 734 deletions(-)
 delete mode 100644 include/net/flowcache.h
 delete mode 100644 net/core/flow.c

(limited to 'include/net')

diff --git a/include/net/flow.h b/include/net/flow.h
index bae198b3039e..f3dc61b29bb5 100644
--- a/include/net/flow.h
+++ b/include/net/flow.h
@@ -218,40 +218,6 @@ static inline unsigned int flow_key_size(u16 family)
 	return 0;
 }
 
-#define FLOW_DIR_IN	0
-#define FLOW_DIR_OUT	1
-#define FLOW_DIR_FWD	2
-
-struct net;
-struct sock;
-struct flow_cache_ops;
-
-struct flow_cache_object {
-	const struct flow_cache_ops *ops;
-};
-
-struct flow_cache_ops {
-	struct flow_cache_object *(*get)(struct flow_cache_object *);
-	int (*check)(struct flow_cache_object *);
-	void (*delete)(struct flow_cache_object *);
-};
-
-typedef struct flow_cache_object *(*flow_resolve_t)(
-		struct net *net, const struct flowi *key, u16 family,
-		u8 dir, struct flow_cache_object *oldobj, void *ctx);
-
-struct flow_cache_object *flow_cache_lookup(struct net *net,
-					    const struct flowi *key, u16 family,
-					    u8 dir, flow_resolve_t resolver,
-					    void *ctx);
-int flow_cache_init(struct net *net);
-void flow_cache_fini(struct net *net);
-void flow_cache_hp_init(void);
-
-void flow_cache_flush(struct net *net);
-void flow_cache_flush_deferred(struct net *net);
-extern atomic_t flow_cache_genid;
-
 __u32 __get_hash_from_flowi6(const struct flowi6 *fl6, struct flow_keys *keys);
 
 static inline __u32 get_hash_from_flowi6(const struct flowi6 *fl6)
diff --git a/include/net/flowcache.h b/include/net/flowcache.h
deleted file mode 100644
index 51eb971e8973..000000000000
--- a/include/net/flowcache.h
+++ /dev/null
@@ -1,25 +0,0 @@
-#ifndef _NET_FLOWCACHE_H
-#define _NET_FLOWCACHE_H
-
-#include <linux/interrupt.h>
-#include <linux/types.h>
-#include <linux/timer.h>
-#include <linux/notifier.h>
-
-struct flow_cache_percpu {
-	struct hlist_head		*hash_table;
-	unsigned int			hash_count;
-	u32				hash_rnd;
-	int				hash_rnd_recalc;
-	struct tasklet_struct		flush_tasklet;
-};
-
-struct flow_cache {
-	u32				hash_shift;
-	struct flow_cache_percpu __percpu *percpu;
-	struct hlist_node		node;
-	unsigned int			low_watermark;
-	unsigned int			high_watermark;
-	struct timer_list		rnd_timer;
-};
-#endif	/* _NET_FLOWCACHE_H */
diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h
index 27bb9633c69d..611521646dd4 100644
--- a/include/net/netns/xfrm.h
+++ b/include/net/netns/xfrm.h
@@ -6,7 +6,6 @@
 #include <linux/workqueue.h>
 #include <linux/xfrm.h>
 #include <net/dst_ops.h>
-#include <net/flowcache.h>
 
 struct ctl_table_header;
 
@@ -73,16 +72,6 @@ struct netns_xfrm {
 	spinlock_t xfrm_state_lock;
 	spinlock_t xfrm_policy_lock;
 	struct mutex xfrm_cfg_mutex;
-
-	/* flow cache part */
-	struct flow_cache	flow_cache_global;
-	atomic_t		flow_cache_genid;
-	struct list_head	flow_cache_gc_list;
-	atomic_t		flow_cache_gc_count;
-	spinlock_t		flow_cache_gc_lock;
-	struct work_struct	flow_cache_gc_work;
-	struct work_struct	flow_cache_flush_work;
-	struct mutex		flow_flush_sem;
 };
 
 #endif
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index c0916ab18d32..e0feba2ce76a 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -563,7 +563,6 @@ struct xfrm_policy {
 	refcount_t		refcnt;
 	struct timer_list	timer;
 
-	struct flow_cache_object flo;
 	atomic_t		genid;
 	u32			priority;
 	u32			index;
@@ -978,7 +977,6 @@ struct xfrm_dst {
 		struct rt6_info		rt6;
 	} u;
 	struct dst_entry *route;
-	struct flow_cache_object flo;
 	struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
 	int num_pols, num_xfrms;
 	u32 xfrm_genid;
@@ -1226,9 +1224,6 @@ static inline void xfrm_sk_free_policy(struct sock *sk)
 	}
 }
 
-void xfrm_garbage_collect(struct net *net);
-void xfrm_garbage_collect_deferred(struct net *net);
-
 #else
 
 static inline void xfrm_sk_free_policy(struct sock *sk) {}
@@ -1263,9 +1258,6 @@ static inline int xfrm6_policy_check_reverse(struct sock *sk, int dir,
 {
 	return 1;
 }
-static inline void xfrm_garbage_collect(struct net *net)
-{
-}
 #endif
 
 static __inline__
diff --git a/net/core/Makefile b/net/core/Makefile
index 79f9479e9658..d501c4278015 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -11,7 +11,6 @@ obj-y		     += dev.o ethtool.o dev_addr_lists.o dst.o netevent.o \
 			neighbour.o rtnetlink.o utils.o link_watch.o filter.o \
 			sock_diag.o dev_ioctl.o tso.o sock_reuseport.o
 
-obj-$(CONFIG_XFRM) += flow.o
 obj-y += net-sysfs.o
 obj-$(CONFIG_PROC_FS) += net-procfs.o
 obj-$(CONFIG_NET_PKTGEN) += pktgen.o
diff --git a/net/core/flow.c b/net/core/flow.c
deleted file mode 100644
index f7f5d1932a27..000000000000
--- a/net/core/flow.c
+++ /dev/null
@@ -1,516 +0,0 @@
-/* flow.c: Generic flow cache.
- *
- * Copyright (C) 2003 Alexey N. Kuznetsov (kuznet@ms2.inr.ac.ru)
- * Copyright (C) 2003 David S. Miller (davem@redhat.com)
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/list.h>
-#include <linux/jhash.h>
-#include <linux/interrupt.h>
-#include <linux/mm.h>
-#include <linux/random.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/smp.h>
-#include <linux/completion.h>
-#include <linux/percpu.h>
-#include <linux/bitops.h>
-#include <linux/notifier.h>
-#include <linux/cpu.h>
-#include <linux/cpumask.h>
-#include <linux/mutex.h>
-#include <net/flow.h>
-#include <linux/atomic.h>
-#include <linux/security.h>
-#include <net/net_namespace.h>
-
-struct flow_cache_entry {
-	union {
-		struct hlist_node	hlist;
-		struct list_head	gc_list;
-	} u;
-	struct net			*net;
-	u16				family;
-	u8				dir;
-	u32				genid;
-	struct flowi			key;
-	struct flow_cache_object	*object;
-};
-
-struct flow_flush_info {
-	struct flow_cache		*cache;
-	atomic_t			cpuleft;
-	struct completion		completion;
-};
-
-static struct kmem_cache *flow_cachep __read_mostly;
-
-#define flow_cache_hash_size(cache)	(1U << (cache)->hash_shift)
-#define FLOW_HASH_RND_PERIOD		(10 * 60 * HZ)
-
-static void flow_cache_new_hashrnd(unsigned long arg)
-{
-	struct flow_cache *fc = (void *) arg;
-	int i;
-
-	for_each_possible_cpu(i)
-		per_cpu_ptr(fc->percpu, i)->hash_rnd_recalc = 1;
-
-	fc->rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD;
-	add_timer(&fc->rnd_timer);
-}
-
-static int flow_entry_valid(struct flow_cache_entry *fle,
-				struct netns_xfrm *xfrm)
-{
-	if (atomic_read(&xfrm->flow_cache_genid) != fle->genid)
-		return 0;
-	if (fle->object && !fle->object->ops->check(fle->object))
-		return 0;
-	return 1;
-}
-
-static void flow_entry_kill(struct flow_cache_entry *fle,
-				struct netns_xfrm *xfrm)
-{
-	if (fle->object)
-		fle->object->ops->delete(fle->object);
-	kmem_cache_free(flow_cachep, fle);
-}
-
-static void flow_cache_gc_task(struct work_struct *work)
-{
-	struct list_head gc_list;
-	struct flow_cache_entry *fce, *n;
-	struct netns_xfrm *xfrm = container_of(work, struct netns_xfrm,
-						flow_cache_gc_work);
-
-	INIT_LIST_HEAD(&gc_list);
-	spin_lock_bh(&xfrm->flow_cache_gc_lock);
-	list_splice_tail_init(&xfrm->flow_cache_gc_list, &gc_list);
-	spin_unlock_bh(&xfrm->flow_cache_gc_lock);
-
-	list_for_each_entry_safe(fce, n, &gc_list, u.gc_list) {
-		flow_entry_kill(fce, xfrm);
-		atomic_dec(&xfrm->flow_cache_gc_count);
-	}
-}
-
-static void flow_cache_queue_garbage(struct flow_cache_percpu *fcp,
-				     unsigned int deleted,
-				     struct list_head *gc_list,
-				     struct netns_xfrm *xfrm)
-{
-	if (deleted) {
-		atomic_add(deleted, &xfrm->flow_cache_gc_count);
-		fcp->hash_count -= deleted;
-		spin_lock_bh(&xfrm->flow_cache_gc_lock);
-		list_splice_tail(gc_list, &xfrm->flow_cache_gc_list);
-		spin_unlock_bh(&xfrm->flow_cache_gc_lock);
-		schedule_work(&xfrm->flow_cache_gc_work);
-	}
-}
-
-static void __flow_cache_shrink(struct flow_cache *fc,
-				struct flow_cache_percpu *fcp,
-				unsigned int shrink_to)
-{
-	struct flow_cache_entry *fle;
-	struct hlist_node *tmp;
-	LIST_HEAD(gc_list);
-	unsigned int deleted = 0;
-	struct netns_xfrm *xfrm = container_of(fc, struct netns_xfrm,
-						flow_cache_global);
-	unsigned int i;
-
-	for (i = 0; i < flow_cache_hash_size(fc); i++) {
-		unsigned int saved = 0;
-
-		hlist_for_each_entry_safe(fle, tmp,
-					  &fcp->hash_table[i], u.hlist) {
-			if (saved < shrink_to &&
-			    flow_entry_valid(fle, xfrm)) {
-				saved++;
-			} else {
-				deleted++;
-				hlist_del(&fle->u.hlist);
-				list_add_tail(&fle->u.gc_list, &gc_list);
-			}
-		}
-	}
-
-	flow_cache_queue_garbage(fcp, deleted, &gc_list, xfrm);
-}
-
-static void flow_cache_shrink(struct flow_cache *fc,
-			      struct flow_cache_percpu *fcp)
-{
-	unsigned int shrink_to = fc->low_watermark / flow_cache_hash_size(fc);
-
-	__flow_cache_shrink(fc, fcp, shrink_to);
-}
-
-static void flow_new_hash_rnd(struct flow_cache *fc,
-			      struct flow_cache_percpu *fcp)
-{
-	get_random_bytes(&fcp->hash_rnd, sizeof(u32));
-	fcp->hash_rnd_recalc = 0;
-	__flow_cache_shrink(fc, fcp, 0);
-}
-
-static u32 flow_hash_code(struct flow_cache *fc,
-			  struct flow_cache_percpu *fcp,
-			  const struct flowi *key,
-			  unsigned int keysize)
-{
-	const u32 *k = (const u32 *) key;
-	const u32 length = keysize * sizeof(flow_compare_t) / sizeof(u32);
-
-	return jhash2(k, length, fcp->hash_rnd)
-		& (flow_cache_hash_size(fc) - 1);
-}
-
-/* I hear what you're saying, use memcmp.  But memcmp cannot make
- * important assumptions that we can here, such as alignment.
- */
-static int flow_key_compare(const struct flowi *key1, const struct flowi *key2,
-			    unsigned int keysize)
-{
-	const flow_compare_t *k1, *k1_lim, *k2;
-
-	k1 = (const flow_compare_t *) key1;
-	k1_lim = k1 + keysize;
-
-	k2 = (const flow_compare_t *) key2;
-
-	do {
-		if (*k1++ != *k2++)
-			return 1;
-	} while (k1 < k1_lim);
-
-	return 0;
-}
-
-struct flow_cache_object *
-flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir,
-		  flow_resolve_t resolver, void *ctx)
-{
-	struct flow_cache *fc = &net->xfrm.flow_cache_global;
-	struct flow_cache_percpu *fcp;
-	struct flow_cache_entry *fle, *tfle;
-	struct flow_cache_object *flo;
-	unsigned int keysize;
-	unsigned int hash;
-
-	local_bh_disable();
-	fcp = this_cpu_ptr(fc->percpu);
-
-	fle = NULL;
-	flo = NULL;
-
-	keysize = flow_key_size(family);
-	if (!keysize)
-		goto nocache;
-
-	/* Packet really early in init?  Making flow_cache_init a
-	 * pre-smp initcall would solve this.  --RR */
-	if (!fcp->hash_table)
-		goto nocache;
-
-	if (fcp->hash_rnd_recalc)
-		flow_new_hash_rnd(fc, fcp);
-
-	hash = flow_hash_code(fc, fcp, key, keysize);
-	hlist_for_each_entry(tfle, &fcp->hash_table[hash], u.hlist) {
-		if (tfle->net == net &&
-		    tfle->family == family &&
-		    tfle->dir == dir &&
-		    flow_key_compare(key, &tfle->key, keysize) == 0) {
-			fle = tfle;
-			break;
-		}
-	}
-
-	if (unlikely(!fle)) {
-		if (fcp->hash_count > fc->high_watermark)
-			flow_cache_shrink(fc, fcp);
-
-		if (atomic_read(&net->xfrm.flow_cache_gc_count) >
-		    2 * num_online_cpus() * fc->high_watermark) {
-			flo = ERR_PTR(-ENOBUFS);
-			goto ret_object;
-		}
-
-		fle = kmem_cache_alloc(flow_cachep, GFP_ATOMIC);
-		if (fle) {
-			fle->net = net;
-			fle->family = family;
-			fle->dir = dir;
-			memcpy(&fle->key, key, keysize * sizeof(flow_compare_t));
-			fle->object = NULL;
-			hlist_add_head(&fle->u.hlist, &fcp->hash_table[hash]);
-			fcp->hash_count++;
-		}
-	} else if (likely(fle->genid == atomic_read(&net->xfrm.flow_cache_genid))) {
-		flo = fle->object;
-		if (!flo)
-			goto ret_object;
-		flo = flo->ops->get(flo);
-		if (flo)
-			goto ret_object;
-	} else if (fle->object) {
-	        flo = fle->object;
-	        flo->ops->delete(flo);
-	        fle->object = NULL;
-	}
-
-nocache:
-	flo = NULL;
-	if (fle) {
-		flo = fle->object;
-		fle->object = NULL;
-	}
-	flo = resolver(net, key, family, dir, flo, ctx);
-	if (fle) {
-		fle->genid = atomic_read(&net->xfrm.flow_cache_genid);
-		if (!IS_ERR(flo))
-			fle->object = flo;
-		else
-			fle->genid--;
-	} else {
-		if (!IS_ERR_OR_NULL(flo))
-			flo->ops->delete(flo);
-	}
-ret_object:
-	local_bh_enable();
-	return flo;
-}
-EXPORT_SYMBOL(flow_cache_lookup);
-
-static void flow_cache_flush_tasklet(unsigned long data)
-{
-	struct flow_flush_info *info = (void *)data;
-	struct flow_cache *fc = info->cache;
-	struct flow_cache_percpu *fcp;
-	struct flow_cache_entry *fle;
-	struct hlist_node *tmp;
-	LIST_HEAD(gc_list);
-	unsigned int deleted = 0;
-	struct netns_xfrm *xfrm = container_of(fc, struct netns_xfrm,
-						flow_cache_global);
-	unsigned int i;
-
-	fcp = this_cpu_ptr(fc->percpu);
-	for (i = 0; i < flow_cache_hash_size(fc); i++) {
-		hlist_for_each_entry_safe(fle, tmp,
-					  &fcp->hash_table[i], u.hlist) {
-			if (flow_entry_valid(fle, xfrm))
-				continue;
-
-			deleted++;
-			hlist_del(&fle->u.hlist);
-			list_add_tail(&fle->u.gc_list, &gc_list);
-		}
-	}
-
-	flow_cache_queue_garbage(fcp, deleted, &gc_list, xfrm);
-
-	if (atomic_dec_and_test(&info->cpuleft))
-		complete(&info->completion);
-}
-
-/*
- * Return whether a cpu needs flushing.  Conservatively, we assume
- * the presence of any entries means the core may require flushing,
- * since the flow_cache_ops.check() function may assume it's running
- * on the same core as the per-cpu cache component.
- */
-static int flow_cache_percpu_empty(struct flow_cache *fc, int cpu)
-{
-	struct flow_cache_percpu *fcp;
-	unsigned int i;
-
-	fcp = per_cpu_ptr(fc->percpu, cpu);
-	for (i = 0; i < flow_cache_hash_size(fc); i++)
-		if (!hlist_empty(&fcp->hash_table[i]))
-			return 0;
-	return 1;
-}
-
-static void flow_cache_flush_per_cpu(void *data)
-{
-	struct flow_flush_info *info = data;
-	struct tasklet_struct *tasklet;
-
-	tasklet = &this_cpu_ptr(info->cache->percpu)->flush_tasklet;
-	tasklet->data = (unsigned long)info;
-	tasklet_schedule(tasklet);
-}
-
-void flow_cache_flush(struct net *net)
-{
-	struct flow_flush_info info;
-	cpumask_var_t mask;
-	int i, self;
-
-	/* Track which cpus need flushing to avoid disturbing all cores. */
-	if (!alloc_cpumask_var(&mask, GFP_KERNEL))
-		return;
-	cpumask_clear(mask);
-
-	/* Don't want cpus going down or up during this. */
-	get_online_cpus();
-	mutex_lock(&net->xfrm.flow_flush_sem);
-	info.cache = &net->xfrm.flow_cache_global;
-	for_each_online_cpu(i)
-		if (!flow_cache_percpu_empty(info.cache, i))
-			cpumask_set_cpu(i, mask);
-	atomic_set(&info.cpuleft, cpumask_weight(mask));
-	if (atomic_read(&info.cpuleft) == 0)
-		goto done;
-
-	init_completion(&info.completion);
-
-	local_bh_disable();
-	self = cpumask_test_and_clear_cpu(smp_processor_id(), mask);
-	on_each_cpu_mask(mask, flow_cache_flush_per_cpu, &info, 0);
-	if (self)
-		flow_cache_flush_tasklet((unsigned long)&info);
-	local_bh_enable();
-
-	wait_for_completion(&info.completion);
-
-done:
-	mutex_unlock(&net->xfrm.flow_flush_sem);
-	put_online_cpus();
-	free_cpumask_var(mask);
-}
-
-static void flow_cache_flush_task(struct work_struct *work)
-{
-	struct netns_xfrm *xfrm = container_of(work, struct netns_xfrm,
-						flow_cache_flush_work);
-	struct net *net = container_of(xfrm, struct net, xfrm);
-
-	flow_cache_flush(net);
-}
-
-void flow_cache_flush_deferred(struct net *net)
-{
-	schedule_work(&net->xfrm.flow_cache_flush_work);
-}
-
-static int flow_cache_cpu_prepare(struct flow_cache *fc, int cpu)
-{
-	struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, cpu);
-	unsigned int sz = sizeof(struct hlist_head) * flow_cache_hash_size(fc);
-
-	if (!fcp->hash_table) {
-		fcp->hash_table = kzalloc_node(sz, GFP_KERNEL, cpu_to_node(cpu));
-		if (!fcp->hash_table) {
-			pr_err("NET: failed to allocate flow cache sz %u\n", sz);
-			return -ENOMEM;
-		}
-		fcp->hash_rnd_recalc = 1;
-		fcp->hash_count = 0;
-		tasklet_init(&fcp->flush_tasklet, flow_cache_flush_tasklet, 0);
-	}
-	return 0;
-}
-
-static int flow_cache_cpu_up_prep(unsigned int cpu, struct hlist_node *node)
-{
-	struct flow_cache *fc = hlist_entry_safe(node, struct flow_cache, node);
-
-	return flow_cache_cpu_prepare(fc, cpu);
-}
-
-static int flow_cache_cpu_dead(unsigned int cpu, struct hlist_node *node)
-{
-	struct flow_cache *fc = hlist_entry_safe(node, struct flow_cache, node);
-	struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, cpu);
-
-	__flow_cache_shrink(fc, fcp, 0);
-	return 0;
-}
-
-int flow_cache_init(struct net *net)
-{
-	int i;
-	struct flow_cache *fc = &net->xfrm.flow_cache_global;
-
-	if (!flow_cachep)
-		flow_cachep = kmem_cache_create("flow_cache",
-						sizeof(struct flow_cache_entry),
-						0, SLAB_PANIC, NULL);
-	spin_lock_init(&net->xfrm.flow_cache_gc_lock);
-	INIT_LIST_HEAD(&net->xfrm.flow_cache_gc_list);
-	INIT_WORK(&net->xfrm.flow_cache_gc_work, flow_cache_gc_task);
-	INIT_WORK(&net->xfrm.flow_cache_flush_work, flow_cache_flush_task);
-	mutex_init(&net->xfrm.flow_flush_sem);
-	atomic_set(&net->xfrm.flow_cache_gc_count, 0);
-
-	fc->hash_shift = 10;
-	fc->low_watermark = 2 * flow_cache_hash_size(fc);
-	fc->high_watermark = 4 * flow_cache_hash_size(fc);
-
-	fc->percpu = alloc_percpu(struct flow_cache_percpu);
-	if (!fc->percpu)
-		return -ENOMEM;
-
-	if (cpuhp_state_add_instance(CPUHP_NET_FLOW_PREPARE, &fc->node))
-		goto err;
-
-	setup_timer(&fc->rnd_timer, flow_cache_new_hashrnd,
-		    (unsigned long) fc);
-	fc->rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD;
-	add_timer(&fc->rnd_timer);
-
-	return 0;
-
-err:
-	for_each_possible_cpu(i) {
-		struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, i);
-		kfree(fcp->hash_table);
-		fcp->hash_table = NULL;
-	}
-
-	free_percpu(fc->percpu);
-	fc->percpu = NULL;
-
-	return -ENOMEM;
-}
-EXPORT_SYMBOL(flow_cache_init);
-
-void flow_cache_fini(struct net *net)
-{
-	int i;
-	struct flow_cache *fc = &net->xfrm.flow_cache_global;
-
-	del_timer_sync(&fc->rnd_timer);
-
-	cpuhp_state_remove_instance_nocalls(CPUHP_NET_FLOW_PREPARE, &fc->node);
-
-	for_each_possible_cpu(i) {
-		struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, i);
-		kfree(fcp->hash_table);
-		fcp->hash_table = NULL;
-	}
-
-	free_percpu(fc->percpu);
-	fc->percpu = NULL;
-}
-EXPORT_SYMBOL(flow_cache_fini);
-
-void __init flow_cache_hp_init(void)
-{
-	int ret;
-
-	ret = cpuhp_setup_state_multi(CPUHP_NET_FLOW_PREPARE,
-				      "net/flow:prepare",
-				      flow_cache_cpu_up_prep,
-				      flow_cache_cpu_dead);
-	WARN_ON(ret < 0);
-}
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 19455a5fc328..4aefb149fe0a 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -213,14 +213,6 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
 	fl4->flowi4_tos = iph->tos;
 }
 
-static inline int xfrm4_garbage_collect(struct dst_ops *ops)
-{
-	struct net *net = container_of(ops, struct net, xfrm.xfrm4_dst_ops);
-
-	xfrm_garbage_collect_deferred(net);
-	return (dst_entries_get_slow(ops) > ops->gc_thresh * 2);
-}
-
 static void xfrm4_update_pmtu(struct dst_entry *dst, struct sock *sk,
 			      struct sk_buff *skb, u32 mtu)
 {
@@ -259,7 +251,6 @@ static void xfrm4_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
 
 static struct dst_ops xfrm4_dst_ops_template = {
 	.family =		AF_INET,
-	.gc =			xfrm4_garbage_collect,
 	.update_pmtu =		xfrm4_update_pmtu,
 	.redirect =		xfrm4_redirect,
 	.cow_metrics =		dst_cow_metrics_generic,
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index ae30dc4973e8..f44b25a48478 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -214,14 +214,6 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse)
 	}
 }
 
-static inline int xfrm6_garbage_collect(struct dst_ops *ops)
-{
-	struct net *net = container_of(ops, struct net, xfrm.xfrm6_dst_ops);
-
-	xfrm_garbage_collect_deferred(net);
-	return dst_entries_get_fast(ops) > ops->gc_thresh * 2;
-}
-
 static void xfrm6_update_pmtu(struct dst_entry *dst, struct sock *sk,
 			      struct sk_buff *skb, u32 mtu)
 {
@@ -279,7 +271,6 @@ static void xfrm6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
 
 static struct dst_ops xfrm6_dst_ops_template = {
 	.family =		AF_INET6,
-	.gc =			xfrm6_garbage_collect,
 	.update_pmtu =		xfrm6_update_pmtu,
 	.redirect =		xfrm6_redirect,
 	.cow_metrics =		dst_cow_metrics_generic,
diff --git a/net/key/af_key.c b/net/key/af_key.c
index ca9d3ae665e7..10d7133e4fe9 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -2398,8 +2398,6 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, const struct sa
 
 out:
 	xfrm_pol_put(xp);
-	if (err == 0)
-		xfrm_garbage_collect(net);
 	return err;
 }
 
@@ -2650,8 +2648,6 @@ static int pfkey_spdget(struct sock *sk, struct sk_buff *skb, const struct sadb_
 
 out:
 	xfrm_pol_put(xp);
-	if (delete && err == 0)
-		xfrm_garbage_collect(net);
 	return err;
 }
 
@@ -2751,8 +2747,6 @@ static int pfkey_spdflush(struct sock *sk, struct sk_buff *skb, const struct sad
 	int err, err2;
 
 	err = xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, true);
-	if (!err)
-		xfrm_garbage_collect(net);
 	err2 = unicast_flush_resp(sk, hdr);
 	if (err || err2) {
 		if (err == -ESRCH) /* empty table - old silent behavior */
diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c
index 5f7e8bfa0c2d..1f9a079e08b0 100644
--- a/net/xfrm/xfrm_device.c
+++ b/net/xfrm/xfrm_device.c
@@ -175,8 +175,6 @@ static int xfrm_dev_down(struct net_device *dev)
 	if (dev->features & NETIF_F_HW_ESP)
 		xfrm_dev_state_flush(dev_net(dev), dev, true);
 
-	xfrm_garbage_collect(dev_net(dev));
-
 	return NOTIFY_DONE;
 }
 
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 145d2395f3c0..0f1db4c18b22 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -246,36 +246,6 @@ expired:
 	xfrm_pol_put(xp);
 }
 
-static struct flow_cache_object *xfrm_policy_flo_get(struct flow_cache_object *flo)
-{
-	struct xfrm_policy *pol = container_of(flo, struct xfrm_policy, flo);
-
-	if (unlikely(pol->walk.dead))
-		flo = NULL;
-	else
-		xfrm_pol_hold(pol);
-
-	return flo;
-}
-
-static int xfrm_policy_flo_check(struct flow_cache_object *flo)
-{
-	struct xfrm_policy *pol = container_of(flo, struct xfrm_policy, flo);
-
-	return !pol->walk.dead;
-}
-
-static void xfrm_policy_flo_delete(struct flow_cache_object *flo)
-{
-	xfrm_pol_put(container_of(flo, struct xfrm_policy, flo));
-}
-
-static const struct flow_cache_ops xfrm_policy_fc_ops = {
-	.get = xfrm_policy_flo_get,
-	.check = xfrm_policy_flo_check,
-	.delete = xfrm_policy_flo_delete,
-};
-
 /* Allocate xfrm_policy. Not used here, it is supposed to be used by pfkeyv2
  * SPD calls.
  */
@@ -298,7 +268,6 @@ struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp)
 				(unsigned long)policy);
 		setup_timer(&policy->polq.hold_timer, xfrm_policy_queue_process,
 			    (unsigned long)policy);
-		policy->flo.ops = &xfrm_policy_fc_ops;
 	}
 	return policy;
 }
@@ -798,7 +767,6 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
 	else
 		hlist_add_head(&policy->bydst, chain);
 	__xfrm_policy_link(policy, dir);
-	atomic_inc(&net->xfrm.flow_cache_genid);
 
 	/* After previous checking, family can either be AF_INET or AF_INET6 */
 	if (policy->family == AF_INET)
@@ -1490,58 +1458,6 @@ static int xfrm_get_tos(const struct flowi *fl, int family)
 	return tos;
 }
 
-static struct flow_cache_object *xfrm_bundle_flo_get(struct flow_cache_object *flo)
-{
-	struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo);
-	struct dst_entry *dst = &xdst->u.dst;
-
-	if (xdst->route == NULL) {
-		/* Dummy bundle - if it has xfrms we were not
-		 * able to build bundle as template resolution failed.
-		 * It means we need to try again resolving. */
-		if (xdst->num_xfrms > 0)
-			return NULL;
-	} else if (dst->flags & DST_XFRM_QUEUE) {
-		return NULL;
-	} else {
-		/* Real bundle */
-		if (stale_bundle(dst))
-			return NULL;
-	}
-
-	dst_hold(dst);
-	return flo;
-}
-
-static int xfrm_bundle_flo_check(struct flow_cache_object *flo)
-{
-	struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo);
-	struct dst_entry *dst = &xdst->u.dst;
-
-	if (!xdst->route)
-		return 0;
-	if (stale_bundle(dst))
-		return 0;
-
-	return 1;
-}
-
-static void xfrm_bundle_flo_delete(struct flow_cache_object *flo)
-{
-	struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo);
-	struct dst_entry *dst = &xdst->u.dst;
-
-	/* Mark DST_OBSOLETE_DEAD to fail the next xfrm_dst_check() */
-	dst->obsolete = DST_OBSOLETE_DEAD;
-	dst_release_immediate(dst);
-}
-
-static const struct flow_cache_ops xfrm_bundle_fc_ops = {
-	.get = xfrm_bundle_flo_get,
-	.check = xfrm_bundle_flo_check,
-	.delete = xfrm_bundle_flo_delete,
-};
-
 static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family)
 {
 	const struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
@@ -1569,7 +1485,6 @@ static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family)
 		struct dst_entry *dst = &xdst->u.dst;
 
 		memset(dst + 1, 0, sizeof(*xdst) - sizeof(*dst));
-		xdst->flo.ops = &xfrm_bundle_fc_ops;
 	} else
 		xdst = ERR_PTR(-ENOBUFS);
 
@@ -2521,11 +2436,9 @@ static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie)
 	 * notice.  That's what we are validating here via the
 	 * stale_bundle() check.
 	 *
-	 * When an xdst is removed from flow cache, DST_OBSOLETE_DEAD will
-	 * be marked on it.
 	 * When a dst is removed from the fib tree, DST_OBSOLETE_DEAD will
 	 * be marked on it.
-	 * Both will force stable_bundle() to fail on any xdst bundle with
+	 * This will force stale_bundle() to fail on any xdst bundle with
 	 * this dst linked in it.
 	 */
 	if (dst->obsolete < 0 && !stale_bundle(dst))
@@ -2565,18 +2478,6 @@ static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst)
 	return dst;
 }
 
-void xfrm_garbage_collect(struct net *net)
-{
-	flow_cache_flush(net);
-}
-EXPORT_SYMBOL(xfrm_garbage_collect);
-
-void xfrm_garbage_collect_deferred(struct net *net)
-{
-	flow_cache_flush_deferred(net);
-}
-EXPORT_SYMBOL(xfrm_garbage_collect_deferred);
-
 static void xfrm_init_pmtu(struct dst_entry *dst)
 {
 	do {
@@ -2914,14 +2815,9 @@ static int __net_init xfrm_net_init(struct net *net)
 	rv = xfrm_sysctl_init(net);
 	if (rv < 0)
 		goto out_sysctl;
-	rv = flow_cache_init(net);
-	if (rv < 0)
-		goto out;
 
 	return 0;
 
-out:
-	xfrm_sysctl_fini(net);
 out_sysctl:
 	xfrm_policy_fini(net);
 out_policy:
@@ -2934,7 +2830,6 @@ out_statistics:
 
 static void __net_exit xfrm_net_exit(struct net *net)
 {
-	flow_cache_fini(net);
 	xfrm_sysctl_fini(net);
 	xfrm_policy_fini(net);
 	xfrm_state_fini(net);
@@ -2948,7 +2843,6 @@ static struct pernet_operations __net_initdata xfrm_net_ops = {
 
 void __init xfrm_init(void)
 {
-	flow_cache_hp_init();
 	register_pernet_subsys(&xfrm_net_ops);
 	seqcount_init(&xfrm_policy_hash_generation);
 	xfrm_input_init();
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 2be4c6af008a..1b539b7dcfab 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -1815,8 +1815,6 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
 
 out:
 	xfrm_pol_put(xp);
-	if (delete && err == 0)
-		xfrm_garbage_collect(net);
 	return err;
 }
 
@@ -2027,7 +2025,6 @@ static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
 			return 0;
 		return err;
 	}
-	xfrm_garbage_collect(net);
 
 	c.data.type = type;
 	c.event = nlh->nlmsg_type;
diff --git a/security/selinux/include/xfrm.h b/security/selinux/include/xfrm.h
index 1450f85b946d..36a7ce9e11ff 100644
--- a/security/selinux/include/xfrm.h
+++ b/security/selinux/include/xfrm.h
@@ -47,10 +47,8 @@ static inline void selinux_xfrm_notify_policyload(void)
 	struct net *net;
 
 	rtnl_lock();
-	for_each_net(net) {
-		atomic_inc(&net->xfrm.flow_cache_genid);
+	for_each_net(net)
 		rt_genid_bump_all(net);
-	}
 	rtnl_unlock();
 }
 #else
-- 
cgit v1.2.3


From ec30d78c14a813db39a647b6a348b4286ba4abf5 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 17 Jul 2017 13:57:27 +0200
Subject: xfrm: add xdst pcpu cache

retain last used xfrm_dst in a pcpu cache.
On next request, reuse this dst if the policies are the same.

The cache will not help with strict RR workloads as there is no hit.

The cache packet-path part is reasonably small, the notifier part is
needed so we do not add long hangs when a device is dismantled but some
pcpu xdst still holds a reference, there are also calls to the flush
operation when userspace deletes SAs so modules can be removed
(there is no hit.

We need to run the dst_release on the correct cpu to avoid races with
packet path.  This is done by adding a work_struct for each cpu and then
doing the actual test/release on each affected cpu via schedule_work_on().

Test results using 4 network namespaces and null encryption:

ns1           ns2          -> ns3           -> ns4
netperf -> xfrm/null enc   -> xfrm/null dec -> netserver

what                    TCP_STREAM      UDP_STREAM      UDP_RR
Flow cache:             14644.61        294.35          327231.64
No flow cache:		14349.81	242.64		202301.72
Pcpu cache:		14629.70	292.21		205595.22

UDP tests used 64byte packets, tests ran for one minute each,
value is average over ten iterations.

'Flow cache' is 'net-next', 'No flow cache' is net-next plus this
series but without this patch.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/xfrm.h     |   1 +
 net/xfrm/xfrm_device.c |   2 +
 net/xfrm/xfrm_policy.c | 127 ++++++++++++++++++++++++++++++++++++++++++++++++-
 net/xfrm/xfrm_state.c  |   5 +-
 4 files changed, 132 insertions(+), 3 deletions(-)

(limited to 'include/net')

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index e0feba2ce76a..afb4929d7232 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -317,6 +317,7 @@ int xfrm_policy_register_afinfo(const struct xfrm_policy_afinfo *afinfo, int fam
 void xfrm_policy_unregister_afinfo(const struct xfrm_policy_afinfo *afinfo);
 void km_policy_notify(struct xfrm_policy *xp, int dir,
 		      const struct km_event *c);
+void xfrm_policy_cache_flush(void);
 void km_state_notify(struct xfrm_state *x, const struct km_event *c);
 
 struct xfrm_tmpl;
diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c
index 1f9a079e08b0..5cd7a244e88d 100644
--- a/net/xfrm/xfrm_device.c
+++ b/net/xfrm/xfrm_device.c
@@ -153,6 +153,7 @@ static int xfrm_dev_register(struct net_device *dev)
 
 static int xfrm_dev_unregister(struct net_device *dev)
 {
+	xfrm_policy_cache_flush();
 	return NOTIFY_DONE;
 }
 
@@ -175,6 +176,7 @@ static int xfrm_dev_down(struct net_device *dev)
 	if (dev->features & NETIF_F_HW_ESP)
 		xfrm_dev_state_flush(dev_net(dev), dev, true);
 
+	xfrm_policy_cache_flush();
 	return NOTIFY_DONE;
 }
 
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 0f1db4c18b22..06c3bf7ab86b 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -24,6 +24,7 @@
 #include <linux/netfilter.h>
 #include <linux/module.h>
 #include <linux/cache.h>
+#include <linux/cpu.h>
 #include <linux/audit.h>
 #include <net/dst.h>
 #include <net/flow.h>
@@ -44,6 +45,8 @@ struct xfrm_flo {
 	u8 flags;
 };
 
+static DEFINE_PER_CPU(struct xfrm_dst *, xfrm_last_dst);
+static struct work_struct *xfrm_pcpu_work __read_mostly;
 static DEFINE_SPINLOCK(xfrm_policy_afinfo_lock);
 static struct xfrm_policy_afinfo const __rcu *xfrm_policy_afinfo[AF_INET6 + 1]
 						__read_mostly;
@@ -972,6 +975,8 @@ int xfrm_policy_flush(struct net *net, u8 type, bool task_valid)
 	}
 	if (!cnt)
 		err = -ESRCH;
+	else
+		xfrm_policy_cache_flush();
 out:
 	spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
 	return err;
@@ -1700,6 +1705,102 @@ static int xfrm_expand_policies(const struct flowi *fl, u16 family,
 
 }
 
+static void xfrm_last_dst_update(struct xfrm_dst *xdst, struct xfrm_dst *old)
+{
+	this_cpu_write(xfrm_last_dst, xdst);
+	if (old)
+		dst_release(&old->u.dst);
+}
+
+static void __xfrm_pcpu_work_fn(void)
+{
+	struct xfrm_dst *old;
+
+	old = this_cpu_read(xfrm_last_dst);
+	if (old && !xfrm_bundle_ok(old))
+		xfrm_last_dst_update(NULL, old);
+}
+
+static void xfrm_pcpu_work_fn(struct work_struct *work)
+{
+	local_bh_disable();
+	rcu_read_lock();
+	__xfrm_pcpu_work_fn();
+	rcu_read_unlock();
+	local_bh_enable();
+}
+
+void xfrm_policy_cache_flush(void)
+{
+	struct xfrm_dst *old;
+	bool found = 0;
+	int cpu;
+
+	local_bh_disable();
+	rcu_read_lock();
+	for_each_possible_cpu(cpu) {
+		old = per_cpu(xfrm_last_dst, cpu);
+		if (old && !xfrm_bundle_ok(old)) {
+			if (smp_processor_id() == cpu) {
+				__xfrm_pcpu_work_fn();
+				continue;
+			}
+			found = true;
+			break;
+		}
+	}
+
+	rcu_read_unlock();
+	local_bh_enable();
+
+	if (!found)
+		return;
+
+	get_online_cpus();
+
+	for_each_possible_cpu(cpu) {
+		bool bundle_release;
+
+		rcu_read_lock();
+		old = per_cpu(xfrm_last_dst, cpu);
+		bundle_release = old && !xfrm_bundle_ok(old);
+		rcu_read_unlock();
+
+		if (!bundle_release)
+			continue;
+
+		if (cpu_online(cpu)) {
+			schedule_work_on(cpu, &xfrm_pcpu_work[cpu]);
+			continue;
+		}
+
+		rcu_read_lock();
+		old = per_cpu(xfrm_last_dst, cpu);
+		if (old && !xfrm_bundle_ok(old)) {
+			per_cpu(xfrm_last_dst, cpu) = NULL;
+			dst_release(&old->u.dst);
+		}
+		rcu_read_unlock();
+	}
+
+	put_online_cpus();
+}
+
+static bool xfrm_pol_dead(struct xfrm_dst *xdst)
+{
+	unsigned int num_pols = xdst->num_pols;
+	unsigned int pol_dead = 0, i;
+
+	for (i = 0; i < num_pols; i++)
+		pol_dead |= xdst->pols[i]->walk.dead;
+
+	/* Mark DST_OBSOLETE_DEAD to fail the next xfrm_dst_check() */
+	if (pol_dead)
+		xdst->u.dst.obsolete = DST_OBSOLETE_DEAD;
+
+	return pol_dead;
+}
+
 static struct xfrm_dst *
 xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
 			       const struct flowi *fl, u16 family,
@@ -1707,10 +1808,22 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
 {
 	struct net *net = xp_net(pols[0]);
 	struct xfrm_state *xfrm[XFRM_MAX_DEPTH];
+	struct xfrm_dst *xdst, *old;
 	struct dst_entry *dst;
-	struct xfrm_dst *xdst;
 	int err;
 
+	xdst = this_cpu_read(xfrm_last_dst);
+	if (xdst &&
+	    xdst->u.dst.dev == dst_orig->dev &&
+	    xdst->num_pols == num_pols &&
+	    !xfrm_pol_dead(xdst) &&
+	    memcmp(xdst->pols, pols,
+		   sizeof(struct xfrm_policy *) * num_pols) == 0) {
+		dst_hold(&xdst->u.dst);
+		return xdst;
+	}
+
+	old = xdst;
 	/* Try to instantiate a bundle */
 	err = xfrm_tmpl_resolve(pols, num_pols, fl, xfrm, family);
 	if (err <= 0) {
@@ -1731,6 +1844,9 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
 	memcpy(xdst->pols, pols, sizeof(struct xfrm_policy *) * num_pols);
 	xdst->policy_genid = atomic_read(&pols[0]->genid);
 
+	atomic_set(&xdst->u.dst.__refcnt, 2);
+	xfrm_last_dst_update(xdst, old);
+
 	return xdst;
 }
 
@@ -2843,6 +2959,15 @@ static struct pernet_operations __net_initdata xfrm_net_ops = {
 
 void __init xfrm_init(void)
 {
+	int i;
+
+	xfrm_pcpu_work = kmalloc_array(NR_CPUS, sizeof(*xfrm_pcpu_work),
+				       GFP_KERNEL);
+	BUG_ON(!xfrm_pcpu_work);
+
+	for (i = 0; i < NR_CPUS; i++)
+		INIT_WORK(&xfrm_pcpu_work[i], xfrm_pcpu_work_fn);
+
 	register_pernet_subsys(&xfrm_net_ops);
 	seqcount_init(&xfrm_policy_hash_generation);
 	xfrm_input_init();
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 6c0956d10db6..82cbbce69b79 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -724,9 +724,10 @@ restart:
 			}
 		}
 	}
-	if (cnt)
+	if (cnt) {
 		err = 0;
-
+		xfrm_policy_cache_flush();
+	}
 out:
 	spin_unlock_bh(&net->xfrm.xfrm_state_lock);
 	return err;
-- 
cgit v1.2.3


From bb4d991a28cc86a2dfbeefeff32911ca9f779c18 Mon Sep 17 00:00:00 2001
From: Yuchung Cheng <ycheng@google.com>
Date: Wed, 19 Jul 2017 15:41:26 -0700
Subject: tcp: adjust tail loss probe timeout

This patch adjusts the timeout formula to schedule the TCP loss probe
(TLP). The previous formula uses 2*SRTT or 1.5*RTT + DelayACKMax if
only one packet is in flight. It keeps a lower bound of 10 msec which
is too large for short RTT connections (e.g. within a data-center).
The new formula = 2*RTT + (inflight == 1 ? 200ms : 2ticks) which
performs better for short and fast connections.

Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h       |  3 +--
 net/ipv4/tcp_output.c   | 17 ++++++++++-------
 net/ipv4/tcp_recovery.c |  2 +-
 3 files changed, 12 insertions(+), 10 deletions(-)

(limited to 'include/net')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 70483296157f..4f056ea79df2 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -139,6 +139,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
 #endif
 #define TCP_RTO_MAX	((unsigned)(120*HZ))
 #define TCP_RTO_MIN	((unsigned)(HZ/5))
+#define TCP_TIMEOUT_MIN	(2U) /* Min timeout for TCP timers in jiffies */
 #define TCP_TIMEOUT_INIT ((unsigned)(1*HZ))	/* RFC6298 2.1 initial RTO value	*/
 #define TCP_TIMEOUT_FALLBACK ((unsigned)(3*HZ))	/* RFC 1122 initial RTO value, now
 						 * used as a fallback RTO for the
@@ -150,8 +151,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
 #define TCP_RESOURCE_PROBE_INTERVAL ((unsigned)(HZ/2U)) /* Maximal interval between probes
 					                 * for local resources.
 					                 */
-#define TCP_REO_TIMEOUT_MIN	(2000) /* Min RACK reordering timeout in usec */
-
 #define TCP_KEEPALIVE_TIME	(120*60*HZ)	/* two hours */
 #define TCP_KEEPALIVE_PROBES	9		/* Max of 9 keepalive probes	*/
 #define TCP_KEEPALIVE_INTVL	(75*HZ)
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 4e985dea1dd2..886d874775df 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2377,7 +2377,6 @@ bool tcp_schedule_loss_probe(struct sock *sk)
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
 	u32 timeout, tlp_time_stamp, rto_time_stamp;
-	u32 rtt = usecs_to_jiffies(tp->srtt_us >> 3);
 
 	/* No consecutive loss probes. */
 	if (WARN_ON(icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)) {
@@ -2406,15 +2405,19 @@ bool tcp_schedule_loss_probe(struct sock *sk)
 	     tcp_send_head(sk))
 		return false;
 
-	/* Probe timeout is at least 1.5*rtt + TCP_DELACK_MAX to account
+	/* Probe timeout is 2*rtt. Add minimum RTO to account
 	 * for delayed ack when there's one outstanding packet. If no RTT
 	 * sample is available then probe after TCP_TIMEOUT_INIT.
 	 */
-	timeout = rtt << 1 ? : TCP_TIMEOUT_INIT;
-	if (tp->packets_out == 1)
-		timeout = max_t(u32, timeout,
-				(rtt + (rtt >> 1) + TCP_DELACK_MAX));
-	timeout = max_t(u32, timeout, msecs_to_jiffies(10));
+	if (tp->srtt_us) {
+		timeout = usecs_to_jiffies(tp->srtt_us >> 2);
+		if (tp->packets_out == 1)
+			timeout += TCP_RTO_MIN;
+		else
+			timeout += TCP_TIMEOUT_MIN;
+	} else {
+		timeout = TCP_TIMEOUT_INIT;
+	}
 
 	/* If RTO is shorter, just schedule TLP in its place. */
 	tlp_time_stamp = tcp_jiffies32 + timeout;
diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c
index fe9a493d0208..449cd914d58e 100644
--- a/net/ipv4/tcp_recovery.c
+++ b/net/ipv4/tcp_recovery.c
@@ -113,7 +113,7 @@ void tcp_rack_mark_lost(struct sock *sk)
 	tp->rack.advanced = 0;
 	tcp_rack_detect_loss(sk, &timeout);
 	if (timeout) {
-		timeout = usecs_to_jiffies(timeout + TCP_REO_TIMEOUT_MIN);
+		timeout = usecs_to_jiffies(timeout) + TCP_TIMEOUT_MIN;
 		inet_csk_reset_xmit_timer(sk, ICSK_TIME_REO_TIMEOUT,
 					  timeout, inet_csk(sk)->icsk_rto);
 	}
-- 
cgit v1.2.3


From e7d53ad3239de636ea478fc003d3652b49b8e593 Mon Sep 17 00:00:00 2001
From: Vivien Didelot <vivien.didelot@savoirfairelinux.com>
Date: Tue, 18 Jul 2017 16:23:56 -0400
Subject: net: dsa: unexport dsa_is_port_initialized

The dsa_is_port_initialized helper is only used by dsa_switch_resume and
dsa_switch_suspend, if CONFIG_PM_SLEEP is enabled. Make it static to
dsa.c.

Signed-off-by: Vivien Didelot <vivien.didelot@savoirfairelinux.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dsa.h | 5 -----
 net/dsa/dsa.c     | 5 +++++
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/net')

diff --git a/include/net/dsa.h b/include/net/dsa.h
index 58969b9a090c..88da272d20d0 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -256,11 +256,6 @@ static inline bool dsa_is_normal_port(struct dsa_switch *ds, int p)
 	return !dsa_is_cpu_port(ds, p) && !dsa_is_dsa_port(ds, p);
 }
 
-static inline bool dsa_is_port_initialized(struct dsa_switch *ds, int p)
-{
-	return ds->enabled_port_mask & (1 << p) && ds->ports[p].netdev;
-}
-
 static inline u8 dsa_upstream_port(struct dsa_switch *ds)
 {
 	struct dsa_switch_tree *dst = ds->dst;
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index 416ac4ef9ba9..a55e2e4087a4 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -220,6 +220,11 @@ static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev,
 }
 
 #ifdef CONFIG_PM_SLEEP
+static bool dsa_is_port_initialized(struct dsa_switch *ds, int p)
+{
+	return ds->enabled_port_mask & (1 << p) && ds->ports[p].netdev;
+}
+
 int dsa_switch_suspend(struct dsa_switch *ds)
 {
 	int i, ret = 0;
-- 
cgit v1.2.3


From 9f08ea848117ab521efcfd3e004d8e1a0edc640c Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 18 Jul 2017 20:18:09 +0200
Subject: netfilter: nf_tables: keep chain counters away from hot path

These chain counters are only used by the iptables-compat tool, that
allow users to use the x_tables extensions from the existing nf_tables
framework. This patch makes nf_tables by ~5% for the general usecase,
ie. native nft users, where no chain counters are used at all.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables_core.h |  2 ++
 net/netfilter/nf_tables_api.c          | 11 +++--------
 net/netfilter/nf_tables_core.c         | 26 ++++++++++++++++++--------
 3 files changed, 23 insertions(+), 16 deletions(-)

(limited to 'include/net')

diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h
index 8f690effec37..424684c33771 100644
--- a/include/net/netfilter/nf_tables_core.h
+++ b/include/net/netfilter/nf_tables_core.h
@@ -49,6 +49,8 @@ struct nft_payload_set {
 };
 
 extern const struct nft_expr_ops nft_payload_fast_ops;
+
+extern struct static_key_false nft_counters_enabled;
 extern struct static_key_false nft_trace_enabled;
 
 #endif /* _NET_NF_TABLES_CORE_H */
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 7843efa33c59..7fbf0070aba1 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -1240,6 +1240,8 @@ static void nf_tables_chain_destroy(struct nft_chain *chain)
 
 		module_put(basechain->type->owner);
 		free_percpu(basechain->stats);
+		if (basechain->stats)
+			static_branch_dec(&nft_counters_enabled);
 		if (basechain->ops[0].dev != NULL)
 			dev_put(basechain->ops[0].dev);
 		kfree(basechain);
@@ -1504,14 +1506,7 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
 				return PTR_ERR(stats);
 			}
 			basechain->stats = stats;
-		} else {
-			stats = netdev_alloc_pcpu_stats(struct nft_stats);
-			if (stats == NULL) {
-				nft_chain_release_hook(&hook);
-				kfree(basechain);
-				return -ENOMEM;
-			}
-			rcu_assign_pointer(basechain->stats, stats);
+			static_branch_inc(&nft_counters_enabled);
 		}
 
 		hookfn = hook.type->hooks[hook.num];
diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
index 65dbeadcb118..c5bab08b0d73 100644
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c
@@ -114,6 +114,22 @@ static bool nft_payload_fast_eval(const struct nft_expr *expr,
 	return true;
 }
 
+DEFINE_STATIC_KEY_FALSE(nft_counters_enabled);
+
+static noinline void nft_update_chain_stats(const struct nft_chain *chain,
+					    const struct nft_pktinfo *pkt)
+{
+	struct nft_stats *stats;
+
+	local_bh_disable();
+	stats = this_cpu_ptr(rcu_dereference(nft_base_chain(chain)->stats));
+	u64_stats_update_begin(&stats->syncp);
+	stats->pkts++;
+	stats->bytes += pkt->skb->len;
+	u64_stats_update_end(&stats->syncp);
+	local_bh_enable();
+}
+
 struct nft_jumpstack {
 	const struct nft_chain	*chain;
 	const struct nft_rule	*rule;
@@ -130,7 +146,6 @@ nft_do_chain(struct nft_pktinfo *pkt, void *priv)
 	struct nft_regs regs;
 	unsigned int stackptr = 0;
 	struct nft_jumpstack jumpstack[NFT_JUMP_STACK_SIZE];
-	struct nft_stats *stats;
 	int rulenum;
 	unsigned int gencursor = nft_genmask_cur(net);
 	struct nft_traceinfo info;
@@ -220,13 +235,8 @@ next_rule:
 	nft_trace_packet(&info, basechain, NULL, -1,
 			 NFT_TRACETYPE_POLICY);
 
-	rcu_read_lock_bh();
-	stats = this_cpu_ptr(rcu_dereference(nft_base_chain(basechain)->stats));
-	u64_stats_update_begin(&stats->syncp);
-	stats->pkts++;
-	stats->bytes += pkt->skb->len;
-	u64_stats_update_end(&stats->syncp);
-	rcu_read_unlock_bh();
+	if (static_branch_unlikely(&nft_counters_enabled))
+		nft_update_chain_stats(basechain, pkt);
 
 	return nft_base_chain(basechain)->policy;
 }
-- 
cgit v1.2.3


From 296d8ee37c50f139d934bdefbab85509b2e4a525 Mon Sep 17 00:00:00 2001
From: Sabrina Dubroca <sd@queasysnail.net>
Date: Fri, 21 Jul 2017 12:49:30 +0200
Subject: net: add infrastructure to un-offload UDP tunnel port

This adds a new NETDEV_UDP_TUNNEL_DROP_INFO event, similar to
NETDEV_UDP_TUNNEL_PUSH_INFO, to signal to un-offload ports.

This also adds udp_tunnel_drop_rx_port(), which calls
ndo_udp_tunnel_del.

Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  1 +
 include/net/udp_tunnel.h  |  8 ++++++++
 net/ipv4/udp_tunnel.c     | 18 ++++++++++++++++++
 3 files changed, 27 insertions(+)

(limited to 'include/net')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 614642eb7eb7..3a3cdc1b1f31 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2317,6 +2317,7 @@ struct netdev_lag_lower_state_info {
 #define NETDEV_PRECHANGEUPPER	0x001A
 #define NETDEV_CHANGELOWERSTATE	0x001B
 #define NETDEV_UDP_TUNNEL_PUSH_INFO	0x001C
+#define NETDEV_UDP_TUNNEL_DROP_INFO	0x001D
 #define NETDEV_CHANGE_TX_QUEUE_LEN	0x001E
 
 int register_netdevice_notifier(struct notifier_block *nb);
diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h
index 02c5be037451..10cce0dd4450 100644
--- a/include/net/udp_tunnel.h
+++ b/include/net/udp_tunnel.h
@@ -115,6 +115,8 @@ struct udp_tunnel_info {
 /* Notify network devices of offloadable types */
 void udp_tunnel_push_rx_port(struct net_device *dev, struct socket *sock,
 			     unsigned short type);
+void udp_tunnel_drop_rx_port(struct net_device *dev, struct socket *sock,
+			     unsigned short type);
 void udp_tunnel_notify_add_rx_port(struct socket *sock, unsigned short type);
 void udp_tunnel_notify_del_rx_port(struct socket *sock, unsigned short type);
 
@@ -124,6 +126,12 @@ static inline void udp_tunnel_get_rx_info(struct net_device *dev)
 	call_netdevice_notifiers(NETDEV_UDP_TUNNEL_PUSH_INFO, dev);
 }
 
+static inline void udp_tunnel_drop_rx_info(struct net_device *dev)
+{
+	ASSERT_RTNL();
+	call_netdevice_notifiers(NETDEV_UDP_TUNNEL_DROP_INFO, dev);
+}
+
 /* Transmit the skb using UDP encapsulation. */
 void udp_tunnel_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb,
 			 __be32 src, __be32 dst, __u8 tos, __u8 ttl,
diff --git a/net/ipv4/udp_tunnel.c b/net/ipv4/udp_tunnel.c
index 0d3f14cdc524..6539ff15e9a3 100644
--- a/net/ipv4/udp_tunnel.c
+++ b/net/ipv4/udp_tunnel.c
@@ -94,6 +94,24 @@ void udp_tunnel_push_rx_port(struct net_device *dev, struct socket *sock,
 }
 EXPORT_SYMBOL_GPL(udp_tunnel_push_rx_port);
 
+void udp_tunnel_drop_rx_port(struct net_device *dev, struct socket *sock,
+			     unsigned short type)
+{
+	struct sock *sk = sock->sk;
+	struct udp_tunnel_info ti;
+
+	if (!dev->netdev_ops->ndo_udp_tunnel_del ||
+	    !(dev->features & NETIF_F_RX_UDP_TUNNEL_PORT))
+		return;
+
+	ti.type = type;
+	ti.sa_family = sk->sk_family;
+	ti.port = inet_sk(sk)->inet_sport;
+
+	dev->netdev_ops->ndo_udp_tunnel_del(dev, &ti);
+}
+EXPORT_SYMBOL_GPL(udp_tunnel_drop_rx_port);
+
 /* Notify netdevs that UDP port started listening */
 void udp_tunnel_notify_add_rx_port(struct socket *sock, unsigned short type)
 {
-- 
cgit v1.2.3


From 787310859d8d1a72545db2343fb3ac8f765b0f35 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Sun, 23 Jul 2017 09:34:32 +0800
Subject: sctp: remove the typedef sctp_sackhdr_t

This patch is to remove the typedef sctp_sackhdr_t, and replace
with struct sctp_sackhdr in the places where it's using this
typedef.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/sctp.h       | 6 +++---
 include/net/sctp/command.h | 4 ++--
 net/sctp/sm_statefuns.c    | 2 +-
 3 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/net')

diff --git a/include/linux/sctp.h b/include/linux/sctp.h
index 8df6ac53f05b..a2e43129d11a 100644
--- a/include/linux/sctp.h
+++ b/include/linux/sctp.h
@@ -373,17 +373,17 @@ union sctp_sack_variable {
 	__be32 dup;
 };
 
-typedef struct sctp_sackhdr {
+struct sctp_sackhdr {
 	__be32 cum_tsn_ack;
 	__be32 a_rwnd;
 	__be16 num_gap_ack_blocks;
 	__be16 num_dup_tsns;
 	union sctp_sack_variable variable[0];
-} sctp_sackhdr_t;
+};
 
 typedef struct sctp_sack_chunk {
 	struct sctp_chunkhdr chunk_hdr;
-	sctp_sackhdr_t sack_hdr;
+	struct sctp_sackhdr sack_hdr;
 } sctp_sack_chunk_t;
 
 
diff --git a/include/net/sctp/command.h b/include/net/sctp/command.h
index d4679e7a5ed5..1d5f6ff3f440 100644
--- a/include/net/sctp/command.h
+++ b/include/net/sctp/command.h
@@ -135,7 +135,7 @@ typedef union {
 	struct sctp_init_chunk *init;
 	struct sctp_ulpevent *ulpevent;
 	struct sctp_packet *packet;
-	sctp_sackhdr_t *sackh;
+	struct sctp_sackhdr *sackh;
 	struct sctp_datamsg *msg;
 } sctp_arg_t;
 
@@ -176,7 +176,7 @@ SCTP_ARG_CONSTRUCTOR(BA,	struct sctp_bind_addr *, bp)
 SCTP_ARG_CONSTRUCTOR(PEER_INIT,	struct sctp_init_chunk *, init)
 SCTP_ARG_CONSTRUCTOR(ULPEVENT,  struct sctp_ulpevent *, ulpevent)
 SCTP_ARG_CONSTRUCTOR(PACKET,	struct sctp_packet *, packet)
-SCTP_ARG_CONSTRUCTOR(SACKH,	sctp_sackhdr_t *, sackh)
+SCTP_ARG_CONSTRUCTOR(SACKH,	struct sctp_sackhdr *, sackh)
 SCTP_ARG_CONSTRUCTOR(DATAMSG,	struct sctp_datamsg *, msg)
 
 static inline sctp_arg_t SCTP_FORCE(void)
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 7f852392f56a..c09dfe6ebac2 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -3187,7 +3187,7 @@ sctp_disposition_t sctp_sf_eat_sack_6_2(struct net *net,
 					sctp_cmd_seq_t *commands)
 {
 	struct sctp_chunk *chunk = arg;
-	sctp_sackhdr_t *sackh;
+	struct sctp_sackhdr *sackh;
 	__u32 ctsn;
 
 	if (!sctp_vtag_verify(chunk, asoc))
-- 
cgit v1.2.3


From e42e24c3cc072088756d84ef07b492ac2a3ae2e5 Mon Sep 17 00:00:00 2001
From: Matvejchikov Ilya <matvejchikov@gmail.com>
Date: Mon, 24 Jul 2017 16:02:12 +0400
Subject: tcp: remove redundant argument from tcp_rcv_established()

The last (4th) argument of tcp_rcv_established() is redundant as it
always equals to skb->len and the skb itself is always passed as 2th
agrument. There is no reason to have it.

Signed-off-by: Ilya V. Matveychikov <matvejchikov@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h    | 2 +-
 net/ipv4/tcp_input.c | 3 ++-
 net/ipv4/tcp_ipv4.c  | 2 +-
 net/ipv4/tcp_probe.c | 5 +++--
 net/ipv6/tcp_ipv6.c  | 2 +-
 5 files changed, 8 insertions(+), 6 deletions(-)

(limited to 'include/net')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 4f056ea79df2..12d68335acd4 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -362,7 +362,7 @@ void tcp_delack_timer_handler(struct sock *sk);
 int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg);
 int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb);
 void tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
-			 const struct tcphdr *th, unsigned int len);
+			 const struct tcphdr *th);
 void tcp_rcv_space_adjust(struct sock *sk);
 int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp);
 void tcp_twsk_destructor(struct sock *sk);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 2920e0cb09f8..adc3f3e9468c 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5358,8 +5358,9 @@ discard:
  *	tcp_data_queue when everything is OK.
  */
 void tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
-			 const struct tcphdr *th, unsigned int len)
+			 const struct tcphdr *th)
 {
+	unsigned int len = skb->len;
 	struct tcp_sock *tp = tcp_sk(sk);
 
 	tcp_mstamp_refresh(tp);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index a20e7f03d5f7..3a19ea28339f 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1458,7 +1458,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
 				sk->sk_rx_dst = NULL;
 			}
 		}
-		tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len);
+		tcp_rcv_established(sk, skb, tcp_hdr(skb));
 		return 0;
 	}
 
diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c
index f6c50af24a64..697f4c67b2e3 100644
--- a/net/ipv4/tcp_probe.c
+++ b/net/ipv4/tcp_probe.c
@@ -105,8 +105,9 @@ static inline int tcp_probe_avail(void)
  * Note: arguments must match tcp_rcv_established()!
  */
 static void jtcp_rcv_established(struct sock *sk, struct sk_buff *skb,
-				 const struct tcphdr *th, unsigned int len)
+				 const struct tcphdr *th)
 {
+	unsigned int len = skb->len;
 	const struct tcp_sock *tp = tcp_sk(sk);
 	const struct inet_sock *inet = inet_sk(sk);
 
@@ -145,7 +146,7 @@ static void jtcp_rcv_established(struct sock *sk, struct sk_buff *skb,
 				BUG();
 			}
 
-			p->length = skb->len;
+			p->length = len;
 			p->snd_nxt = tp->snd_nxt;
 			p->snd_una = tp->snd_una;
 			p->snd_cwnd = tp->snd_cwnd;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 2521690d62d6..90a32576c3d0 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1296,7 +1296,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
 			}
 		}
 
-		tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len);
+		tcp_rcv_established(sk, skb, tcp_hdr(skb));
 		if (opt_skb)
 			goto ipv6_pktoptions;
 		return 0;
-- 
cgit v1.2.3


From 64c83d837329531252a1a0f0dfdd4fd607e1d8e9 Mon Sep 17 00:00:00 2001
From: Jamal Hadi Salim <jhs@mojatatu.com>
Date: Sun, 30 Jul 2017 13:24:49 -0400
Subject: net netlink: Add new type NLA_BITFIELD32

Generic bitflags attribute content sent to the kernel by user.
With this netlink attr type the user can either set or unset a
flag in the kernel.

The value is a bitmap that defines the bit values being set
The selector is a bitmask that defines which value bit is to be
considered.

A check is made to ensure the rules that a kernel subsystem always
conforms to bitflags the kernel already knows about. i.e
if the user tries to set a bit flag that is not understood then
the _it will be rejected_.

In the most basic form, the user specifies the attribute policy as:
[ATTR_GOO] = { .type = NLA_BITFIELD32, .validation_data = &myvalidflags },

where myvalidflags is the bit mask of the flags the kernel understands.

If the user _does not_ provide myvalidflags then the attribute will
also be rejected.

Examples:
value = 0x0, and selector = 0x1
implies we are selecting bit 1 and we want to set its value to 0.

value = 0x2, and selector = 0x2
implies we are selecting bit 2 and we want to set its value to 1.

Suggested-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netlink.h        | 16 ++++++++++++++++
 include/uapi/linux/netlink.h | 17 +++++++++++++++++
 lib/nlattr.c                 | 30 ++++++++++++++++++++++++++++++
 3 files changed, 63 insertions(+)

(limited to 'include/net')

diff --git a/include/net/netlink.h b/include/net/netlink.h
index ef8e6c3a80a6..82dd298b40c7 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -178,6 +178,7 @@ enum {
 	NLA_S16,
 	NLA_S32,
 	NLA_S64,
+	NLA_BITFIELD32,
 	__NLA_TYPE_MAX,
 };
 
@@ -206,6 +207,7 @@ enum {
  *    NLA_MSECS            Leaving the length field zero will verify the
  *                         given type fits, using it verifies minimum length
  *                         just like "All other"
+ *    NLA_BITFIELD32      A 32-bit bitmap/bitselector attribute
  *    All other            Minimum length of attribute payload
  *
  * Example:
@@ -213,11 +215,13 @@ enum {
  * 	[ATTR_FOO] = { .type = NLA_U16 },
  *	[ATTR_BAR] = { .type = NLA_STRING, .len = BARSIZ },
  *	[ATTR_BAZ] = { .len = sizeof(struct mystruct) },
+ *	[ATTR_GOO] = { .type = NLA_BITFIELD32, .validation_data = &myvalidflags },
  * };
  */
 struct nla_policy {
 	u16		type;
 	u16		len;
+	void            *validation_data;
 };
 
 /**
@@ -1202,6 +1206,18 @@ static inline struct in6_addr nla_get_in6_addr(const struct nlattr *nla)
 	return tmp;
 }
 
+/**
+ * nla_get_bitfield32 - return payload of 32 bitfield attribute
+ * @nla: nla_bitfield32 attribute
+ */
+static inline struct nla_bitfield32 nla_get_bitfield32(const struct nlattr *nla)
+{
+	struct nla_bitfield32 tmp;
+
+	nla_memcpy(&tmp, nla, sizeof(tmp));
+	return tmp;
+}
+
 /**
  * nla_memdup - duplicate attribute memory (kmemdup)
  * @src: netlink attribute to duplicate from
diff --git a/include/uapi/linux/netlink.h b/include/uapi/linux/netlink.h
index f86127a46cfc..f4fc9c9e123d 100644
--- a/include/uapi/linux/netlink.h
+++ b/include/uapi/linux/netlink.h
@@ -226,5 +226,22 @@ struct nlattr {
 #define NLA_ALIGN(len)		(((len) + NLA_ALIGNTO - 1) & ~(NLA_ALIGNTO - 1))
 #define NLA_HDRLEN		((int) NLA_ALIGN(sizeof(struct nlattr)))
 
+/* Generic 32 bitflags attribute content sent to the kernel.
+ *
+ * The value is a bitmap that defines the values being set
+ * The selector is a bitmask that defines which value is legit
+ *
+ * Examples:
+ *  value = 0x0, and selector = 0x1
+ *  implies we are selecting bit 1 and we want to set its value to 0.
+ *
+ *  value = 0x2, and selector = 0x2
+ *  implies we are selecting bit 2 and we want to set its value to 1.
+ *
+ */
+struct nla_bitfield32 {
+	__u32 value;
+	__u32 selector;
+};
 
 #endif /* _UAPI__LINUX_NETLINK_H */
diff --git a/lib/nlattr.c b/lib/nlattr.c
index fb52435be42d..ee79b7a3c6b0 100644
--- a/lib/nlattr.c
+++ b/lib/nlattr.c
@@ -27,6 +27,30 @@ static const u8 nla_attr_minlen[NLA_TYPE_MAX+1] = {
 	[NLA_S64]	= sizeof(s64),
 };
 
+static int validate_nla_bitfield32(const struct nlattr *nla,
+				   u32 *valid_flags_allowed)
+{
+	const struct nla_bitfield32 *bf = nla_data(nla);
+	u32 *valid_flags_mask = valid_flags_allowed;
+
+	if (!valid_flags_allowed)
+		return -EINVAL;
+
+	/*disallow invalid bit selector */
+	if (bf->selector & ~*valid_flags_mask)
+		return -EINVAL;
+
+	/*disallow invalid bit values */
+	if (bf->value & ~*valid_flags_mask)
+		return -EINVAL;
+
+	/*disallow valid bit values that are not selected*/
+	if (bf->value & ~bf->selector)
+		return -EINVAL;
+
+	return 0;
+}
+
 static int validate_nla(const struct nlattr *nla, int maxtype,
 			const struct nla_policy *policy)
 {
@@ -46,6 +70,12 @@ static int validate_nla(const struct nlattr *nla, int maxtype,
 			return -ERANGE;
 		break;
 
+	case NLA_BITFIELD32:
+		if (attrlen != sizeof(struct nla_bitfield32))
+			return -ERANGE;
+
+		return validate_nla_bitfield32(nla, pt->validation_data);
+
 	case NLA_NUL_STRING:
 		if (pt->len)
 			minlen = min_t(int, attrlen, pt->len + 1);
-- 
cgit v1.2.3


From ac7b848390036dadd4351899d2a23748075916bd Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Wed, 26 Jul 2017 00:02:31 +0200
Subject: netfilter: expect: add and use nf_ct_expect_iterate helpers

We have several spots that open-code a expect walk, add a helper
that is similar to nf_ct_iterate_destroy/nf_ct_iterate_cleanup.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_expect.h |  5 +++
 net/netfilter/nf_conntrack_expect.c         | 54 +++++++++++++++++++++++++
 net/netfilter/nf_conntrack_helper.c         | 34 +++++++---------
 net/netfilter/nf_conntrack_netlink.c        | 63 ++++++++++-------------------
 4 files changed, 95 insertions(+), 61 deletions(-)

(limited to 'include/net')

diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h
index 2ba54feaccd8..818def011110 100644
--- a/include/net/netfilter/nf_conntrack_expect.h
+++ b/include/net/netfilter/nf_conntrack_expect.h
@@ -107,6 +107,11 @@ void nf_ct_remove_expectations(struct nf_conn *ct);
 void nf_ct_unexpect_related(struct nf_conntrack_expect *exp);
 bool nf_ct_remove_expect(struct nf_conntrack_expect *exp);
 
+void nf_ct_expect_iterate_destroy(bool (*iter)(struct nf_conntrack_expect *e, void *data), void *data);
+void nf_ct_expect_iterate_net(struct net *net,
+			      bool (*iter)(struct nf_conntrack_expect *e, void *data),
+                              void *data, u32 portid, int report);
+
 /* Allocate space for an expectation: this is mandatory before calling
    nf_ct_expect_related.  You will have to call put afterwards. */
 struct nf_conntrack_expect *nf_ct_expect_alloc(struct nf_conn *me);
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 2c63808bea96..dad2c0c22ad5 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -474,6 +474,60 @@ out:
 }
 EXPORT_SYMBOL_GPL(nf_ct_expect_related_report);
 
+void nf_ct_expect_iterate_destroy(bool (*iter)(struct nf_conntrack_expect *e, void *data),
+				  void *data)
+{
+	struct nf_conntrack_expect *exp;
+	const struct hlist_node *next;
+	unsigned int i;
+
+	spin_lock_bh(&nf_conntrack_expect_lock);
+
+	for (i = 0; i < nf_ct_expect_hsize; i++) {
+		hlist_for_each_entry_safe(exp, next,
+					  &nf_ct_expect_hash[i],
+					  hnode) {
+			if (iter(exp, data) && del_timer(&exp->timeout)) {
+				nf_ct_unlink_expect(exp);
+				nf_ct_expect_put(exp);
+			}
+		}
+	}
+
+	spin_unlock_bh(&nf_conntrack_expect_lock);
+}
+EXPORT_SYMBOL_GPL(nf_ct_expect_iterate_destroy);
+
+void nf_ct_expect_iterate_net(struct net *net,
+			      bool (*iter)(struct nf_conntrack_expect *e, void *data),
+			      void *data,
+			      u32 portid, int report)
+{
+	struct nf_conntrack_expect *exp;
+	const struct hlist_node *next;
+	unsigned int i;
+
+	spin_lock_bh(&nf_conntrack_expect_lock);
+
+	for (i = 0; i < nf_ct_expect_hsize; i++) {
+		hlist_for_each_entry_safe(exp, next,
+					  &nf_ct_expect_hash[i],
+					  hnode) {
+
+			if (!net_eq(nf_ct_exp_net(exp), net))
+				continue;
+
+			if (iter(exp, data) && del_timer(&exp->timeout)) {
+				nf_ct_unlink_expect_report(exp, portid, report);
+				nf_ct_expect_put(exp);
+			}
+		}
+	}
+
+	spin_unlock_bh(&nf_conntrack_expect_lock);
+}
+EXPORT_SYMBOL_GPL(nf_ct_expect_iterate_net);
+
 #ifdef CONFIG_NF_CONNTRACK_PROCFS
 struct ct_expect_iter_state {
 	struct seq_net_private p;
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 9129bb3b5153..551a1eddf0fa 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -437,12 +437,22 @@ out:
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_helper_register);
 
-void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
+static bool expect_iter_me(struct nf_conntrack_expect *exp, void *data)
 {
-	struct nf_conntrack_expect *exp;
-	const struct hlist_node *next;
-	unsigned int i;
+	struct nf_conn_help *help = nfct_help(exp->master);
+	const struct nf_conntrack_helper *me = data;
+	const struct nf_conntrack_helper *this;
+
+	if (exp->helper == me)
+		return true;
 
+	this = rcu_dereference_protected(help->helper,
+					 lockdep_is_held(&nf_conntrack_expect_lock));
+	return this == me;
+}
+
+void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
+{
 	mutex_lock(&nf_ct_helper_mutex);
 	hlist_del_rcu(&me->hnode);
 	nf_ct_helper_count--;
@@ -453,21 +463,7 @@ void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
 	 */
 	synchronize_rcu();
 
-	/* Get rid of expectations */
-	spin_lock_bh(&nf_conntrack_expect_lock);
-	for (i = 0; i < nf_ct_expect_hsize; i++) {
-		hlist_for_each_entry_safe(exp, next,
-					  &nf_ct_expect_hash[i], hnode) {
-			struct nf_conn_help *help = nfct_help(exp->master);
-			if ((rcu_dereference_protected(
-					help->helper,
-					lockdep_is_held(&nf_conntrack_expect_lock)
-					) == me || exp->helper == me))
-				nf_ct_remove_expect(exp);
-		}
-	}
-	spin_unlock_bh(&nf_conntrack_expect_lock);
-
+	nf_ct_expect_iterate_destroy(expect_iter_me, NULL);
 	nf_ct_iterate_destroy(unhelp, me);
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_helper_unregister);
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 4dba71de4de7..4922c8aefb2a 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -2898,6 +2898,21 @@ out:
 	return err == -EAGAIN ? -ENOBUFS : err;
 }
 
+static bool expect_iter_name(struct nf_conntrack_expect *exp, void *data)
+{
+	const struct nf_conn_help *m_help;
+	const char *name = data;
+
+	m_help = nfct_help(exp->master);
+
+	return strcmp(m_help->helper->name, name) == 0;
+}
+
+static bool expect_iter_all(struct nf_conntrack_expect *exp, void *data)
+{
+	return true;
+}
+
 static int ctnetlink_del_expect(struct net *net, struct sock *ctnl,
 				struct sk_buff *skb, const struct nlmsghdr *nlh,
 				const struct nlattr * const cda[],
@@ -2906,10 +2921,8 @@ static int ctnetlink_del_expect(struct net *net, struct sock *ctnl,
 	struct nf_conntrack_expect *exp;
 	struct nf_conntrack_tuple tuple;
 	struct nfgenmsg *nfmsg = nlmsg_data(nlh);
-	struct hlist_node *next;
 	u_int8_t u3 = nfmsg->nfgen_family;
 	struct nf_conntrack_zone zone;
-	unsigned int i;
 	int err;
 
 	if (cda[CTA_EXPECT_TUPLE]) {
@@ -2949,49 +2962,15 @@ static int ctnetlink_del_expect(struct net *net, struct sock *ctnl,
 		nf_ct_expect_put(exp);
 	} else if (cda[CTA_EXPECT_HELP_NAME]) {
 		char *name = nla_data(cda[CTA_EXPECT_HELP_NAME]);
-		struct nf_conn_help *m_help;
 
-		/* delete all expectations for this helper */
-		spin_lock_bh(&nf_conntrack_expect_lock);
-		for (i = 0; i < nf_ct_expect_hsize; i++) {
-			hlist_for_each_entry_safe(exp, next,
-						  &nf_ct_expect_hash[i],
-						  hnode) {
-
-				if (!net_eq(nf_ct_exp_net(exp), net))
-					continue;
-
-				m_help = nfct_help(exp->master);
-				if (!strcmp(m_help->helper->name, name) &&
-				    del_timer(&exp->timeout)) {
-					nf_ct_unlink_expect_report(exp,
-							NETLINK_CB(skb).portid,
-							nlmsg_report(nlh));
-					nf_ct_expect_put(exp);
-				}
-			}
-		}
-		spin_unlock_bh(&nf_conntrack_expect_lock);
+		nf_ct_expect_iterate_net(net, expect_iter_name, name,
+					 NETLINK_CB(skb).portid,
+					 nlmsg_report(nlh));
 	} else {
 		/* This basically means we have to flush everything*/
-		spin_lock_bh(&nf_conntrack_expect_lock);
-		for (i = 0; i < nf_ct_expect_hsize; i++) {
-			hlist_for_each_entry_safe(exp, next,
-						  &nf_ct_expect_hash[i],
-						  hnode) {
-
-				if (!net_eq(nf_ct_exp_net(exp), net))
-					continue;
-
-				if (del_timer(&exp->timeout)) {
-					nf_ct_unlink_expect_report(exp,
-							NETLINK_CB(skb).portid,
-							nlmsg_report(nlh));
-					nf_ct_expect_put(exp);
-				}
-			}
-		}
-		spin_unlock_bh(&nf_conntrack_expect_lock);
+		nf_ct_expect_iterate_net(net, expect_iter_all, NULL,
+					 NETLINK_CB(skb).portid,
+					 nlmsg_report(nlh));
 	}
 
 	return 0;
-- 
cgit v1.2.3


From 84657984c26fd0b64743a397f3a1a587fa4b575a Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Wed, 26 Jul 2017 00:02:32 +0200
Subject: netfilter: add and use nf_ct_unconfirmed_destroy

This also removes __nf_ct_unconfirmed_destroy() call from
nf_ct_iterate_cleanup_net, so that function can be used only
when missing conntracks from unconfirmed list isn't a problem.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack.h |  3 +++
 net/netfilter/nf_conntrack_core.c    | 15 +++++++++++----
 net/netfilter/nfnetlink_cttimeout.c  |  1 +
 3 files changed, 15 insertions(+), 4 deletions(-)

(limited to 'include/net')

diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index 48407569585d..6e6f678aaac7 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -224,6 +224,9 @@ extern s32 (*nf_ct_nat_offset)(const struct nf_conn *ct,
 			       enum ip_conntrack_dir dir,
 			       u32 seq);
 
+/* Set all unconfirmed conntrack as dying */
+void nf_ct_unconfirmed_destroy(struct net *);
+
 /* Iterate over all conntracks: if iter returns true, it's deleted. */
 void nf_ct_iterate_cleanup_net(struct net *net,
 			       int (*iter)(struct nf_conn *i, void *data),
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index c6f1cf0bff56..80ab4e937765 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1686,6 +1686,17 @@ __nf_ct_unconfirmed_destroy(struct net *net)
 	}
 }
 
+void nf_ct_unconfirmed_destroy(struct net *net)
+{
+	might_sleep();
+
+	if (atomic_read(&net->ct.count) > 0) {
+		__nf_ct_unconfirmed_destroy(net);
+		synchronize_net();
+	}
+}
+EXPORT_SYMBOL_GPL(nf_ct_unconfirmed_destroy);
+
 void nf_ct_iterate_cleanup_net(struct net *net,
 			       int (*iter)(struct nf_conn *i, void *data),
 			       void *data, u32 portid, int report)
@@ -1697,14 +1708,10 @@ void nf_ct_iterate_cleanup_net(struct net *net,
 	if (atomic_read(&net->ct.count) == 0)
 		return;
 
-	__nf_ct_unconfirmed_destroy(net);
-
 	d.iter = iter;
 	d.data = data;
 	d.net = net;
 
-	synchronize_net();
-
 	nf_ct_iterate_cleanup(iter_net_only, &d, portid, report);
 }
 EXPORT_SYMBOL_GPL(nf_ct_iterate_cleanup_net);
diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c
index 7ce9e86d374c..f4fb6d4dd0b9 100644
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -570,6 +570,7 @@ static void __net_exit cttimeout_net_exit(struct net *net)
 {
 	struct ctnl_timeout *cur, *tmp;
 
+	nf_ct_unconfirmed_destroy(net);
 	ctnl_untimeout(net, NULL);
 
 	list_for_each_entry_safe(cur, tmp, &net->nfct_timeout_list, head) {
-- 
cgit v1.2.3


From 2cf0c8b3e6942ecafe6ebb1a6d0328a81641bf39 Mon Sep 17 00:00:00 2001
From: Phil Sutter <phil@nwl.cc>
Date: Thu, 27 Jul 2017 16:56:40 +0200
Subject: netlink: Introduce nla_strdup()

This is similar to strdup() for netlink string attributes.

Signed-off-by: Phil Sutter <phil@nwl.cc>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netlink.h |  1 +
 lib/nlattr.c          | 24 ++++++++++++++++++++++++
 2 files changed, 25 insertions(+)

(limited to 'include/net')

diff --git a/include/net/netlink.h b/include/net/netlink.h
index ef8e6c3a80a6..c8c2eb5ae55e 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -247,6 +247,7 @@ int nla_parse(struct nlattr **tb, int maxtype, const struct nlattr *head,
 int nla_policy_len(const struct nla_policy *, int);
 struct nlattr *nla_find(const struct nlattr *head, int len, int attrtype);
 size_t nla_strlcpy(char *dst, const struct nlattr *nla, size_t dstsize);
+char *nla_strdup(const struct nlattr *nla, gfp_t flags);
 int nla_memcpy(void *dest, const struct nlattr *src, int count);
 int nla_memcmp(const struct nlattr *nla, const void *data, size_t size);
 int nla_strcmp(const struct nlattr *nla, const char *str);
diff --git a/lib/nlattr.c b/lib/nlattr.c
index fb52435be42d..f13013f7e21a 100644
--- a/lib/nlattr.c
+++ b/lib/nlattr.c
@@ -271,6 +271,30 @@ size_t nla_strlcpy(char *dst, const struct nlattr *nla, size_t dstsize)
 }
 EXPORT_SYMBOL(nla_strlcpy);
 
+/**
+ * nla_strdup - Copy string attribute payload into a newly allocated buffer
+ * @nla: attribute to copy the string from
+ * @flags: the type of memory to allocate (see kmalloc).
+ *
+ * Returns a pointer to the allocated buffer or NULL on error.
+ */
+char *nla_strdup(const struct nlattr *nla, gfp_t flags)
+{
+	size_t srclen = nla_len(nla);
+	char *src = nla_data(nla), *dst;
+
+	if (srclen > 0 && src[srclen - 1] == '\0')
+		srclen--;
+
+	dst = kmalloc(srclen + 1, flags);
+	if (dst != NULL) {
+		memcpy(dst, src, srclen);
+		dst[srclen] = '\0';
+	}
+	return dst;
+}
+EXPORT_SYMBOL(nla_strdup);
+
 /**
  * nla_memcpy - Copy a netlink attribute into another memory area
  * @dest: where to copy to memcpy
-- 
cgit v1.2.3


From e46abbcc05aa8a16b0e7f5c94e86d11af9aa2770 Mon Sep 17 00:00:00 2001
From: Phil Sutter <phil@nwl.cc>
Date: Thu, 27 Jul 2017 16:56:41 +0200
Subject: netfilter: nf_tables: Allow table names of up to 255 chars

Allocate all table names dynamically to allow for arbitrary lengths but
introduce NFT_NAME_MAXLEN as an upper sanity boundary. It's value was
chosen to allow using a domain name as per RFC 1035.

Signed-off-by: Phil Sutter <phil@nwl.cc>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h        |  2 +-
 include/uapi/linux/netfilter/nf_tables.h |  3 +-
 net/netfilter/nf_tables_api.c            | 49 +++++++++++++++++++++++---------
 net/netfilter/nf_tables_trace.c          |  2 +-
 4 files changed, 40 insertions(+), 16 deletions(-)

(limited to 'include/net')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index bd5be0d691d5..05ecf78ec078 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -957,7 +957,7 @@ struct nft_table {
 	u32				use;
 	u16				flags:14,
 					genmask:2;
-	char				name[NFT_TABLE_MAXNAMELEN];
+	char				*name;
 };
 
 enum nft_af_flags {
diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index 6f0a950e21c3..0b94e572ef16 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -1,7 +1,8 @@
 #ifndef _LINUX_NF_TABLES_H
 #define _LINUX_NF_TABLES_H
 
-#define NFT_TABLE_MAXNAMELEN	32
+#define NFT_NAME_MAXLEN		256
+#define NFT_TABLE_MAXNAMELEN	NFT_NAME_MAXLEN
 #define NFT_CHAIN_MAXNAMELEN	32
 #define NFT_SET_MAXNAMELEN	32
 #define NFT_OBJ_MAXNAMELEN	32
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index b77ad0813564..c2e392d5e512 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -726,7 +726,10 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk,
 	if (table == NULL)
 		goto err2;
 
-	nla_strlcpy(table->name, name, NFT_TABLE_MAXNAMELEN);
+	table->name = nla_strdup(name, GFP_KERNEL);
+	if (table->name == NULL)
+		goto err3;
+
 	INIT_LIST_HEAD(&table->chains);
 	INIT_LIST_HEAD(&table->sets);
 	INIT_LIST_HEAD(&table->objects);
@@ -735,10 +738,12 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk,
 	nft_ctx_init(&ctx, net, skb, nlh, afi, table, NULL, nla);
 	err = nft_trans_table_add(&ctx, NFT_MSG_NEWTABLE);
 	if (err < 0)
-		goto err3;
+		goto err4;
 
 	list_add_tail_rcu(&table->list, &afi->tables);
 	return 0;
+err4:
+	kfree(table->name);
 err3:
 	kfree(table);
 err2:
@@ -865,6 +870,7 @@ static void nf_tables_table_destroy(struct nft_ctx *ctx)
 {
 	BUG_ON(ctx->table->use > 0);
 
+	kfree(ctx->table->name);
 	kfree(ctx->table);
 	module_put(ctx->afi->owner);
 }
@@ -1972,7 +1978,7 @@ err:
 }
 
 struct nft_rule_dump_ctx {
-	char table[NFT_TABLE_MAXNAMELEN];
+	char *table;
 	char chain[NFT_CHAIN_MAXNAMELEN];
 };
 
@@ -1997,7 +2003,7 @@ static int nf_tables_dump_rules(struct sk_buff *skb,
 			continue;
 
 		list_for_each_entry_rcu(table, &afi->tables, list) {
-			if (ctx && ctx->table[0] &&
+			if (ctx && ctx->table &&
 			    strcmp(ctx->table, table->name) != 0)
 				continue;
 
@@ -2037,7 +2043,12 @@ done:
 
 static int nf_tables_dump_rules_done(struct netlink_callback *cb)
 {
-	kfree(cb->data);
+	struct nft_rule_dump_ctx *ctx = cb->data;
+
+	if (ctx) {
+		kfree(ctx->table);
+		kfree(ctx);
+	}
 	return 0;
 }
 
@@ -2069,9 +2080,14 @@ static int nf_tables_getrule(struct net *net, struct sock *nlsk,
 			if (!ctx)
 				return -ENOMEM;
 
-			if (nla[NFTA_RULE_TABLE])
-				nla_strlcpy(ctx->table, nla[NFTA_RULE_TABLE],
-					    sizeof(ctx->table));
+			if (nla[NFTA_RULE_TABLE]) {
+				ctx->table = nla_strdup(nla[NFTA_RULE_TABLE],
+							GFP_KERNEL);
+				if (!ctx->table) {
+					kfree(ctx);
+					return -ENOMEM;
+				}
+			}
 			if (nla[NFTA_RULE_CHAIN])
 				nla_strlcpy(ctx->chain, nla[NFTA_RULE_CHAIN],
 					    sizeof(ctx->chain));
@@ -4410,7 +4426,7 @@ nla_put_failure:
 }
 
 struct nft_obj_filter {
-	char		table[NFT_OBJ_MAXNAMELEN];
+	char		*table;
 	u32		type;
 };
 
@@ -4475,7 +4491,10 @@ done:
 
 static int nf_tables_dump_obj_done(struct netlink_callback *cb)
 {
-	kfree(cb->data);
+	struct nft_obj_filter *filter = cb->data;
+
+	kfree(filter->table);
+	kfree(filter);
 
 	return 0;
 }
@@ -4489,9 +4508,13 @@ nft_obj_filter_alloc(const struct nlattr * const nla[])
 	if (!filter)
 		return ERR_PTR(-ENOMEM);
 
-	if (nla[NFTA_OBJ_TABLE])
-		nla_strlcpy(filter->table, nla[NFTA_OBJ_TABLE],
-			    NFT_TABLE_MAXNAMELEN);
+	if (nla[NFTA_OBJ_TABLE]) {
+		filter->table = nla_strdup(nla[NFTA_OBJ_TABLE], GFP_KERNEL);
+		if (!filter->table) {
+			kfree(filter);
+			return ERR_PTR(-ENOMEM);
+		}
+	}
 	if (nla[NFTA_OBJ_TYPE])
 		filter->type = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE]));
 
diff --git a/net/netfilter/nf_tables_trace.c b/net/netfilter/nf_tables_trace.c
index 0c3a0049e4aa..62787d985e9d 100644
--- a/net/netfilter/nf_tables_trace.c
+++ b/net/netfilter/nf_tables_trace.c
@@ -175,7 +175,7 @@ void nft_trace_notify(struct nft_traceinfo *info)
 		return;
 
 	size = nlmsg_total_size(sizeof(struct nfgenmsg)) +
-		nla_total_size(NFT_TABLE_MAXNAMELEN) +
+		nla_total_size(strlen(info->chain->table->name)) +
 		nla_total_size(NFT_CHAIN_MAXNAMELEN) +
 		nla_total_size_64bit(sizeof(__be64)) +	/* rule handle */
 		nla_total_size(sizeof(__be32)) +	/* trace type */
-- 
cgit v1.2.3


From b7263e071aba736cea9e71cdf2e76dfa7aebd039 Mon Sep 17 00:00:00 2001
From: Phil Sutter <phil@nwl.cc>
Date: Thu, 27 Jul 2017 16:56:42 +0200
Subject: netfilter: nf_tables: Allow chain name of up to 255 chars

Same conversion as for table names, use NFT_NAME_MAXLEN as upper
boundary as well.

Signed-off-by: Phil Sutter <phil@nwl.cc>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h        |  4 ++--
 include/uapi/linux/netfilter/nf_tables.h |  2 +-
 net/netfilter/nf_tables_api.c            | 34 ++++++++++++++++++++++++--------
 net/netfilter/nf_tables_trace.c          | 27 +++++++++++++++++++++++--
 4 files changed, 54 insertions(+), 13 deletions(-)

(limited to 'include/net')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 05ecf78ec078..be1610162ee0 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -859,7 +859,7 @@ struct nft_chain {
 	u16				level;
 	u8				flags:6,
 					genmask:2;
-	char				name[NFT_CHAIN_MAXNAMELEN];
+	char				*name;
 };
 
 enum nft_chain_type {
@@ -1272,7 +1272,7 @@ struct nft_trans_set {
 
 struct nft_trans_chain {
 	bool				update;
-	char				name[NFT_CHAIN_MAXNAMELEN];
+	char				*name;
 	struct nft_stats __percpu	*stats;
 	u8				policy;
 };
diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index 0b94e572ef16..d9c03a8608ee 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -3,7 +3,7 @@
 
 #define NFT_NAME_MAXLEN		256
 #define NFT_TABLE_MAXNAMELEN	NFT_NAME_MAXLEN
-#define NFT_CHAIN_MAXNAMELEN	32
+#define NFT_CHAIN_MAXNAMELEN	NFT_NAME_MAXLEN
 #define NFT_SET_MAXNAMELEN	32
 #define NFT_OBJ_MAXNAMELEN	32
 #define NFT_USERDATA_MAXLEN	256
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index c2e392d5e512..747499039709 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -1250,8 +1250,10 @@ static void nf_tables_chain_destroy(struct nft_chain *chain)
 			static_branch_dec(&nft_counters_enabled);
 		if (basechain->ops[0].dev != NULL)
 			dev_put(basechain->ops[0].dev);
+		kfree(chain->name);
 		kfree(basechain);
 	} else {
+		kfree(chain->name);
 		kfree(chain);
 	}
 }
@@ -1476,8 +1478,13 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
 			nft_trans_chain_policy(trans) = -1;
 
 		if (nla[NFTA_CHAIN_HANDLE] && name) {
-			nla_strlcpy(nft_trans_chain_name(trans), name,
-				    NFT_CHAIN_MAXNAMELEN);
+			nft_trans_chain_name(trans) =
+				nla_strdup(name, GFP_KERNEL);
+			if (!nft_trans_chain_name(trans)) {
+				kfree(trans);
+				free_percpu(stats);
+				return -ENOMEM;
+			}
 		}
 		list_add_tail(&trans->list, &net->nft.commit_list);
 		return 0;
@@ -1544,7 +1551,11 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
 	INIT_LIST_HEAD(&chain->rules);
 	chain->handle = nf_tables_alloc_handle(table);
 	chain->table = table;
-	nla_strlcpy(chain->name, name, NFT_CHAIN_MAXNAMELEN);
+	chain->name = nla_strdup(name, GFP_KERNEL);
+	if (!chain->name) {
+		err = -ENOMEM;
+		goto err1;
+	}
 
 	err = nf_tables_register_hooks(net, table, chain, afi->nops);
 	if (err < 0)
@@ -1979,7 +1990,7 @@ err:
 
 struct nft_rule_dump_ctx {
 	char *table;
-	char chain[NFT_CHAIN_MAXNAMELEN];
+	char *chain;
 };
 
 static int nf_tables_dump_rules(struct sk_buff *skb,
@@ -2047,6 +2058,7 @@ static int nf_tables_dump_rules_done(struct netlink_callback *cb)
 
 	if (ctx) {
 		kfree(ctx->table);
+		kfree(ctx->chain);
 		kfree(ctx);
 	}
 	return 0;
@@ -2088,9 +2100,15 @@ static int nf_tables_getrule(struct net *net, struct sock *nlsk,
 					return -ENOMEM;
 				}
 			}
-			if (nla[NFTA_RULE_CHAIN])
-				nla_strlcpy(ctx->chain, nla[NFTA_RULE_CHAIN],
-					    sizeof(ctx->chain));
+			if (nla[NFTA_RULE_CHAIN]) {
+				ctx->chain = nla_strdup(nla[NFTA_RULE_CHAIN],
+							GFP_KERNEL);
+				if (!ctx->chain) {
+					kfree(ctx->table);
+					kfree(ctx);
+					return -ENOMEM;
+				}
+			}
 			c.data = ctx;
 		}
 
@@ -4863,7 +4881,7 @@ static void nft_chain_commit_update(struct nft_trans *trans)
 {
 	struct nft_base_chain *basechain;
 
-	if (nft_trans_chain_name(trans)[0])
+	if (nft_trans_chain_name(trans))
 		strcpy(trans->ctx.chain->name, nft_trans_chain_name(trans));
 
 	if (!nft_is_base_chain(trans->ctx.chain))
diff --git a/net/netfilter/nf_tables_trace.c b/net/netfilter/nf_tables_trace.c
index 62787d985e9d..e1dc527a493b 100644
--- a/net/netfilter/nf_tables_trace.c
+++ b/net/netfilter/nf_tables_trace.c
@@ -162,6 +162,27 @@ static int nf_trace_fill_rule_info(struct sk_buff *nlskb,
 			    NFTA_TRACE_PAD);
 }
 
+static bool nft_trace_have_verdict_chain(struct nft_traceinfo *info)
+{
+	switch (info->type) {
+	case NFT_TRACETYPE_RETURN:
+	case NFT_TRACETYPE_RULE:
+		break;
+	default:
+		return false;
+	}
+
+	switch (info->verdict->code) {
+	case NFT_JUMP:
+	case NFT_GOTO:
+		break;
+	default:
+		return false;
+	}
+
+	return true;
+}
+
 void nft_trace_notify(struct nft_traceinfo *info)
 {
 	const struct nft_pktinfo *pkt = info->pkt;
@@ -176,12 +197,11 @@ void nft_trace_notify(struct nft_traceinfo *info)
 
 	size = nlmsg_total_size(sizeof(struct nfgenmsg)) +
 		nla_total_size(strlen(info->chain->table->name)) +
-		nla_total_size(NFT_CHAIN_MAXNAMELEN) +
+		nla_total_size(strlen(info->chain->name)) +
 		nla_total_size_64bit(sizeof(__be64)) +	/* rule handle */
 		nla_total_size(sizeof(__be32)) +	/* trace type */
 		nla_total_size(0) +			/* VERDICT, nested */
 			nla_total_size(sizeof(u32)) +	/* verdict code */
-			nla_total_size(NFT_CHAIN_MAXNAMELEN) + /* jump target */
 		nla_total_size(sizeof(u32)) +		/* id */
 		nla_total_size(NFT_TRACETYPE_LL_HSIZE) +
 		nla_total_size(NFT_TRACETYPE_NETWORK_HSIZE) +
@@ -194,6 +214,9 @@ void nft_trace_notify(struct nft_traceinfo *info)
 		nla_total_size(sizeof(u32)) +		/* nfproto */
 		nla_total_size(sizeof(u32));		/* policy */
 
+	if (nft_trace_have_verdict_chain(info))
+		size += nla_total_size(strlen(info->verdict->chain->name)); /* jump target */
+
 	skb = nlmsg_new(size, GFP_ATOMIC);
 	if (!skb)
 		return;
-- 
cgit v1.2.3


From 387454901bd62022ac1b04e15bd8d4fcc60bbed4 Mon Sep 17 00:00:00 2001
From: Phil Sutter <phil@nwl.cc>
Date: Thu, 27 Jul 2017 16:56:43 +0200
Subject: netfilter: nf_tables: Allow set names of up to 255 chars

Same conversion as for table names, use NFT_NAME_MAXLEN as upper
boundary as well.

Signed-off-by: Phil Sutter <phil@nwl.cc>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h        |  2 +-
 include/uapi/linux/netfilter/nf_tables.h |  2 +-
 net/netfilter/nf_tables_api.c            | 18 ++++++++++++++----
 3 files changed, 16 insertions(+), 6 deletions(-)

(limited to 'include/net')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index be1610162ee0..66ba62fa7d90 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -396,7 +396,7 @@ void nft_unregister_set(struct nft_set_type *type);
 struct nft_set {
 	struct list_head		list;
 	struct list_head		bindings;
-	char				name[NFT_SET_MAXNAMELEN];
+	char				*name;
 	u32				ktype;
 	u32				dtype;
 	u32				objtype;
diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index d9c03a8608ee..b5e73e80b7b6 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -4,7 +4,7 @@
 #define NFT_NAME_MAXLEN		256
 #define NFT_TABLE_MAXNAMELEN	NFT_NAME_MAXLEN
 #define NFT_CHAIN_MAXNAMELEN	NFT_NAME_MAXLEN
-#define NFT_SET_MAXNAMELEN	32
+#define NFT_SET_MAXNAMELEN	NFT_NAME_MAXLEN
 #define NFT_OBJ_MAXNAMELEN	32
 #define NFT_USERDATA_MAXLEN	256
 
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 747499039709..e6a07f27b1a3 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -2650,7 +2650,7 @@ static int nf_tables_set_alloc_name(struct nft_ctx *ctx, struct nft_set *set,
 	unsigned long *inuse;
 	unsigned int n = 0, min = 0;
 
-	p = strnchr(name, NFT_SET_MAXNAMELEN, '%');
+	p = strchr(name, '%');
 	if (p != NULL) {
 		if (p[1] != 'd' || strchr(p + 2, '%'))
 			return -EINVAL;
@@ -2681,7 +2681,10 @@ cont:
 		free_page((unsigned long)inuse);
 	}
 
-	snprintf(set->name, sizeof(set->name), name, min + n);
+	set->name = kasprintf(GFP_KERNEL, name, min + n);
+	if (!set->name)
+		return -ENOMEM;
+
 	list_for_each_entry(i, &ctx->table->sets, list) {
 		if (!nft_is_active_next(ctx->net, i))
 			continue;
@@ -2958,7 +2961,7 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
 	struct nft_table *table;
 	struct nft_set *set;
 	struct nft_ctx ctx;
-	char name[NFT_SET_MAXNAMELEN];
+	char *name;
 	unsigned int size;
 	bool create;
 	u64 timeout;
@@ -3104,8 +3107,14 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
 		goto err1;
 	}
 
-	nla_strlcpy(name, nla[NFTA_SET_NAME], sizeof(set->name));
+	name = nla_strdup(nla[NFTA_SET_NAME], GFP_KERNEL);
+	if (!name) {
+		err = -ENOMEM;
+		goto err2;
+	}
+
 	err = nf_tables_set_alloc_name(&ctx, set, name);
+	kfree(name);
 	if (err < 0)
 		goto err2;
 
@@ -3155,6 +3164,7 @@ static void nft_set_destroy(struct nft_set *set)
 {
 	set->ops->destroy(set);
 	module_put(set->ops->type->owner);
+	kfree(set->name);
 	kvfree(set);
 }
 
-- 
cgit v1.2.3


From 615095752100748e221028fc96163c2b78185ae4 Mon Sep 17 00:00:00 2001
From: Phil Sutter <phil@nwl.cc>
Date: Thu, 27 Jul 2017 16:56:44 +0200
Subject: netfilter: nf_tables: Allow object names of up to 255 chars

Same conversion as for table names, use NFT_NAME_MAXLEN as upper
boundary as well.

Signed-off-by: Phil Sutter <phil@nwl.cc>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h        |  2 +-
 include/uapi/linux/netfilter/nf_tables.h |  2 +-
 net/netfilter/nf_tables_api.c            | 11 +++++++++--
 3 files changed, 11 insertions(+), 4 deletions(-)

(limited to 'include/net')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 66ba62fa7d90..f9795fe394f3 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -1016,7 +1016,7 @@ int nft_verdict_dump(struct sk_buff *skb, int type,
  */
 struct nft_object {
 	struct list_head		list;
-	char				name[NFT_OBJ_MAXNAMELEN];
+	char				*name;
 	struct nft_table		*table;
 	u32				genmask:2,
 					use:30;
diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index b5e73e80b7b6..be25cf69295b 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -5,7 +5,7 @@
 #define NFT_TABLE_MAXNAMELEN	NFT_NAME_MAXLEN
 #define NFT_CHAIN_MAXNAMELEN	NFT_NAME_MAXLEN
 #define NFT_SET_MAXNAMELEN	NFT_NAME_MAXLEN
-#define NFT_OBJ_MAXNAMELEN	32
+#define NFT_OBJ_MAXNAMELEN	NFT_NAME_MAXLEN
 #define NFT_USERDATA_MAXLEN	256
 
 /**
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index e6a07f27b1a3..149785ff1c7b 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -4402,15 +4402,21 @@ static int nf_tables_newobj(struct net *net, struct sock *nlsk,
 		goto err1;
 	}
 	obj->table = table;
-	nla_strlcpy(obj->name, nla[NFTA_OBJ_NAME], NFT_OBJ_MAXNAMELEN);
+	obj->name = nla_strdup(nla[NFTA_OBJ_NAME], GFP_KERNEL);
+	if (!obj->name) {
+		err = -ENOMEM;
+		goto err2;
+	}
 
 	err = nft_trans_obj_add(&ctx, NFT_MSG_NEWOBJ, obj);
 	if (err < 0)
-		goto err2;
+		goto err3;
 
 	list_add_tail_rcu(&obj->list, &table->objects);
 	table->use++;
 	return 0;
+err3:
+	kfree(obj->name);
 err2:
 	if (obj->type->destroy)
 		obj->type->destroy(obj);
@@ -4626,6 +4632,7 @@ static void nft_obj_destroy(struct nft_object *obj)
 		obj->type->destroy(obj);
 
 	module_put(obj->type->owner);
+	kfree(obj->name);
 	kfree(obj);
 }
 
-- 
cgit v1.2.3


From 4d3a57f23dec59f0a2362e63540b2d01b37afe0a Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Fri, 28 Jul 2017 11:22:04 +0200
Subject: netfilter: conntrack: do not enable connection tracking unless needed

Discussion during NFWS 2017 in Faro has shown that the current
conntrack behaviour is unreasonable.

Even if conntrack module is loaded on behalf of a single net namespace,
its turned on for all namespaces, which is expensive.  Commit
481fa373476 ("netfilter: conntrack: add nf_conntrack_default_on sysctl")
attempted to provide an alternative to the 'default on' behaviour by
adding a sysctl to change it.

However, as Eric points out, the sysctl only becomes available
once the module is loaded, and then its too late.

So we either have to move the sysctl to the core, or, alternatively,
change conntrack to become active only once the rule set requires this.

This does the latter, conntrack is only enabled when a rule needs it.

Reported-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 Documentation/networking/nf_conntrack-sysctl.txt | 11 ---------
 include/net/netfilter/nf_conntrack_l3proto.h     | 15 ------------
 net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c   | 16 ++-----------
 net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c   | 17 ++------------
 net/netfilter/nf_conntrack_proto.c               | 29 ------------------------
 net/netfilter/nf_conntrack_standalone.c          | 10 --------
 6 files changed, 4 insertions(+), 94 deletions(-)

(limited to 'include/net')

diff --git a/Documentation/networking/nf_conntrack-sysctl.txt b/Documentation/networking/nf_conntrack-sysctl.txt
index 497d668288f9..433b6724797a 100644
--- a/Documentation/networking/nf_conntrack-sysctl.txt
+++ b/Documentation/networking/nf_conntrack-sysctl.txt
@@ -96,17 +96,6 @@ nf_conntrack_max - INTEGER
 	Size of connection tracking table.  Default value is
 	nf_conntrack_buckets value * 4.
 
-nf_conntrack_default_on - BOOLEAN
-	0 - don't register conntrack in new net namespaces
-	1 - register conntrack in new net namespaces (default)
-
-	This controls wheter newly created network namespaces have connection
-	tracking enabled by default.  It will be enabled automatically
-	regardless of this setting if the new net namespace requires
-	connection tracking, e.g. when NAT rules are created.
-	This setting is only visible in initial user namespace, it has no
-	effect on existing namespaces.
-
 nf_conntrack_tcp_be_liberal - BOOLEAN
 	0 - disabled (default)
 	not 0 - enabled
diff --git a/include/net/netfilter/nf_conntrack_l3proto.h b/include/net/netfilter/nf_conntrack_l3proto.h
index 6d14b36e3a49..1b8de164d744 100644
--- a/include/net/netfilter/nf_conntrack_l3proto.h
+++ b/include/net/netfilter/nf_conntrack_l3proto.h
@@ -73,21 +73,6 @@ struct nf_conntrack_l3proto {
 
 extern struct nf_conntrack_l3proto __rcu *nf_ct_l3protos[NFPROTO_NUMPROTO];
 
-#ifdef CONFIG_SYSCTL
-/* Protocol pernet registration. */
-int nf_ct_l3proto_pernet_register(struct net *net,
-				  struct nf_conntrack_l3proto *proto);
-#else
-static inline int nf_ct_l3proto_pernet_register(struct net *n,
-						struct nf_conntrack_l3proto *p)
-{
-	return 0;
-}
-#endif
-
-void nf_ct_l3proto_pernet_unregister(struct net *net,
-				     struct nf_conntrack_l3proto *proto);
-
 /* Protocol global registration. */
 int nf_ct_l3proto_register(struct nf_conntrack_l3proto *proto);
 void nf_ct_l3proto_unregister(struct nf_conntrack_l3proto *proto);
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 63e4ea0e01f8..de5f0e6ddd1b 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -398,24 +398,12 @@ static struct nf_conntrack_l4proto *builtin_l4proto4[] = {
 
 static int ipv4_net_init(struct net *net)
 {
-	int ret = 0;
-
-	ret = nf_ct_l4proto_pernet_register(net, builtin_l4proto4,
-					    ARRAY_SIZE(builtin_l4proto4));
-	if (ret < 0)
-		return ret;
-	ret = nf_ct_l3proto_pernet_register(net, &nf_conntrack_l3proto_ipv4);
-	if (ret < 0) {
-		pr_err("nf_conntrack_ipv4: pernet registration failed\n");
-		nf_ct_l4proto_pernet_unregister(net, builtin_l4proto4,
-						ARRAY_SIZE(builtin_l4proto4));
-	}
-	return ret;
+	return nf_ct_l4proto_pernet_register(net, builtin_l4proto4,
+					     ARRAY_SIZE(builtin_l4proto4));
 }
 
 static void ipv4_net_exit(struct net *net)
 {
-	nf_ct_l3proto_pernet_unregister(net, &nf_conntrack_l3proto_ipv4);
 	nf_ct_l4proto_pernet_unregister(net, builtin_l4proto4,
 					ARRAY_SIZE(builtin_l4proto4));
 }
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index f2d2f4a9294b..ddef5ee9e0a8 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -398,25 +398,12 @@ static struct nf_conntrack_l4proto *builtin_l4proto6[] = {
 
 static int ipv6_net_init(struct net *net)
 {
-	int ret = 0;
-
-	ret = nf_ct_l4proto_pernet_register(net, builtin_l4proto6,
-					    ARRAY_SIZE(builtin_l4proto6));
-	if (ret < 0)
-		return ret;
-
-	ret = nf_ct_l3proto_pernet_register(net, &nf_conntrack_l3proto_ipv6);
-	if (ret < 0) {
-		pr_err("nf_conntrack_ipv6: pernet registration failed.\n");
-		nf_ct_l4proto_pernet_unregister(net, builtin_l4proto6,
-						ARRAY_SIZE(builtin_l4proto6));
-	}
-	return ret;
+	return nf_ct_l4proto_pernet_register(net, builtin_l4proto6,
+					     ARRAY_SIZE(builtin_l4proto6));
 }
 
 static void ipv6_net_exit(struct net *net)
 {
-	nf_ct_l3proto_pernet_unregister(net, &nf_conntrack_l3proto_ipv6);
 	nf_ct_l4proto_pernet_unregister(net, builtin_l4proto6,
 					ARRAY_SIZE(builtin_l4proto6));
 }
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index 1dcad229c3cc..7c89dade6fd3 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -238,20 +238,6 @@ out_unlock:
 }
 EXPORT_SYMBOL_GPL(nf_ct_l3proto_register);
 
-#ifdef CONFIG_SYSCTL
-extern unsigned int nf_conntrack_default_on;
-
-int nf_ct_l3proto_pernet_register(struct net *net,
-				  struct nf_conntrack_l3proto *proto)
-{
-	if (nf_conntrack_default_on == 0)
-		return 0;
-
-	return proto->net_ns_get ? proto->net_ns_get(net) : 0;
-}
-EXPORT_SYMBOL_GPL(nf_ct_l3proto_pernet_register);
-#endif
-
 void nf_ct_l3proto_unregister(struct nf_conntrack_l3proto *proto)
 {
 	BUG_ON(proto->l3proto >= NFPROTO_NUMPROTO);
@@ -270,21 +256,6 @@ void nf_ct_l3proto_unregister(struct nf_conntrack_l3proto *proto)
 }
 EXPORT_SYMBOL_GPL(nf_ct_l3proto_unregister);
 
-void nf_ct_l3proto_pernet_unregister(struct net *net,
-				     struct nf_conntrack_l3proto *proto)
-{
-	/*
-	 * nf_conntrack_default_on *might* have registered hooks.
-	 * ->net_ns_put must cope with more puts() than get(), i.e.
-	 * if nf_conntrack_default_on was 0 at time of
-	 * nf_ct_l3proto_pernet_register invocation this net_ns_put()
-	 * should be a noop.
-	 */
-	if (proto->net_ns_put)
-		proto->net_ns_put(net);
-}
-EXPORT_SYMBOL_GPL(nf_ct_l3proto_pernet_unregister);
-
 static struct nf_proto_net *nf_ct_l4proto_net(struct net *net,
 					      struct nf_conntrack_l4proto *l4proto)
 {
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index ccb5cb9043e0..5b6c675d55b1 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -452,9 +452,6 @@ static int log_invalid_proto_max __read_mostly = 255;
 /* size the user *wants to set */
 static unsigned int nf_conntrack_htable_size_user __read_mostly;
 
-extern unsigned int nf_conntrack_default_on;
-unsigned int nf_conntrack_default_on __read_mostly = 1;
-
 static int
 nf_conntrack_hash_sysctl(struct ctl_table *table, int write,
 			 void __user *buffer, size_t *lenp, loff_t *ppos)
@@ -520,13 +517,6 @@ static struct ctl_table nf_ct_sysctl_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec,
 	},
-	{
-		.procname	= "nf_conntrack_default_on",
-		.data		= &nf_conntrack_default_on,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
 	{ }
 };
 
-- 
cgit v1.2.3


From e7942d0633c47c791ece6afa038be9cf977226de Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Sun, 30 Jul 2017 03:57:18 +0200
Subject: tcp: remove prequeue support

prequeue is a tcp receive optimization that moves part of rx processing
from bh to process context.

This only works if the socket being processed belongs to a process that
is blocked in recv on that socket.

In practice, this doesn't happen anymore that often because nowadays
servers tend to use an event driven (epoll) model.

Even normal client applications (web browsers) commonly use many tcp
connections in parallel.

This has measureable impact only in netperf (which uses plain recv and
thus allows prequeue use) from host to locally running vm (~4%), however,
there were no changes when using netperf between two physical hosts with
ixgbe interfaces.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h      |   9 ----
 include/net/tcp.h        |  11 -----
 net/ipv4/tcp.c           | 105 -----------------------------------------------
 net/ipv4/tcp_input.c     |  62 ----------------------------
 net/ipv4/tcp_ipv4.c      |  61 +--------------------------
 net/ipv4/tcp_minisocks.c |   1 -
 net/ipv4/tcp_timer.c     |  12 ------
 net/ipv6/tcp_ipv6.c      |   3 +-
 8 files changed, 2 insertions(+), 262 deletions(-)

(limited to 'include/net')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 542ca1ae02c4..32fb37cfb0d1 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -192,15 +192,6 @@ struct tcp_sock {
 
 	struct list_head tsq_node; /* anchor in tsq_tasklet.head list */
 
-	/* Data for direct copy to user */
-	struct {
-		struct sk_buff_head	prequeue;
-		struct task_struct	*task;
-		struct msghdr		*msg;
-		int			memory;
-		int			len;
-	} ucopy;
-
 	u32	snd_wl1;	/* Sequence for window update		*/
 	u32	snd_wnd;	/* The window we expect to receive	*/
 	u32	max_window;	/* Maximal window ever seen from peer	*/
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 12d68335acd4..93f115cfc8f8 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1244,17 +1244,6 @@ static inline bool tcp_checksum_complete(struct sk_buff *skb)
 		__tcp_checksum_complete(skb);
 }
 
-/* Prequeue for VJ style copy to user, combined with checksumming. */
-
-static inline void tcp_prequeue_init(struct tcp_sock *tp)
-{
-	tp->ucopy.task = NULL;
-	tp->ucopy.len = 0;
-	tp->ucopy.memory = 0;
-	skb_queue_head_init(&tp->ucopy.prequeue);
-}
-
-bool tcp_prequeue(struct sock *sk, struct sk_buff *skb);
 bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb);
 int tcp_filter(struct sock *sk, struct sk_buff *skb);
 
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 71ce33decd97..62018ea6f45f 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -400,7 +400,6 @@ void tcp_init_sock(struct sock *sk)
 
 	tp->out_of_order_queue = RB_ROOT;
 	tcp_init_xmit_timers(sk);
-	tcp_prequeue_init(tp);
 	INIT_LIST_HEAD(&tp->tsq_node);
 
 	icsk->icsk_rto = TCP_TIMEOUT_INIT;
@@ -1525,20 +1524,6 @@ static void tcp_cleanup_rbuf(struct sock *sk, int copied)
 		tcp_send_ack(sk);
 }
 
-static void tcp_prequeue_process(struct sock *sk)
-{
-	struct sk_buff *skb;
-	struct tcp_sock *tp = tcp_sk(sk);
-
-	NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPPREQUEUED);
-
-	while ((skb = __skb_dequeue(&tp->ucopy.prequeue)) != NULL)
-		sk_backlog_rcv(sk, skb);
-
-	/* Clear memory counter. */
-	tp->ucopy.memory = 0;
-}
-
 static struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off)
 {
 	struct sk_buff *skb;
@@ -1671,7 +1656,6 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
 	int err;
 	int target;		/* Read at least this many bytes */
 	long timeo;
-	struct task_struct *user_recv = NULL;
 	struct sk_buff *skb, *last;
 	u32 urg_hole = 0;
 
@@ -1806,51 +1790,6 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
 
 		tcp_cleanup_rbuf(sk, copied);
 
-		if (!sysctl_tcp_low_latency && tp->ucopy.task == user_recv) {
-			/* Install new reader */
-			if (!user_recv && !(flags & (MSG_TRUNC | MSG_PEEK))) {
-				user_recv = current;
-				tp->ucopy.task = user_recv;
-				tp->ucopy.msg = msg;
-			}
-
-			tp->ucopy.len = len;
-
-			WARN_ON(tp->copied_seq != tp->rcv_nxt &&
-				!(flags & (MSG_PEEK | MSG_TRUNC)));
-
-			/* Ugly... If prequeue is not empty, we have to
-			 * process it before releasing socket, otherwise
-			 * order will be broken at second iteration.
-			 * More elegant solution is required!!!
-			 *
-			 * Look: we have the following (pseudo)queues:
-			 *
-			 * 1. packets in flight
-			 * 2. backlog
-			 * 3. prequeue
-			 * 4. receive_queue
-			 *
-			 * Each queue can be processed only if the next ones
-			 * are empty. At this point we have empty receive_queue.
-			 * But prequeue _can_ be not empty after 2nd iteration,
-			 * when we jumped to start of loop because backlog
-			 * processing added something to receive_queue.
-			 * We cannot release_sock(), because backlog contains
-			 * packets arrived _after_ prequeued ones.
-			 *
-			 * Shortly, algorithm is clear --- to process all
-			 * the queues in order. We could make it more directly,
-			 * requeueing packets from backlog to prequeue, if
-			 * is not empty. It is more elegant, but eats cycles,
-			 * unfortunately.
-			 */
-			if (!skb_queue_empty(&tp->ucopy.prequeue))
-				goto do_prequeue;
-
-			/* __ Set realtime policy in scheduler __ */
-		}
-
 		if (copied >= target) {
 			/* Do not sleep, just process backlog. */
 			release_sock(sk);
@@ -1859,31 +1798,6 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
 			sk_wait_data(sk, &timeo, last);
 		}
 
-		if (user_recv) {
-			int chunk;
-
-			/* __ Restore normal policy in scheduler __ */
-
-			chunk = len - tp->ucopy.len;
-			if (chunk != 0) {
-				NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMBACKLOG, chunk);
-				len -= chunk;
-				copied += chunk;
-			}
-
-			if (tp->rcv_nxt == tp->copied_seq &&
-			    !skb_queue_empty(&tp->ucopy.prequeue)) {
-do_prequeue:
-				tcp_prequeue_process(sk);
-
-				chunk = len - tp->ucopy.len;
-				if (chunk != 0) {
-					NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk);
-					len -= chunk;
-					copied += chunk;
-				}
-			}
-		}
 		if ((flags & MSG_PEEK) &&
 		    (peek_seq - copied - urg_hole != tp->copied_seq)) {
 			net_dbg_ratelimited("TCP(%s:%d): Application bug, race in MSG_PEEK\n",
@@ -1955,25 +1869,6 @@ skip_copy:
 		break;
 	} while (len > 0);
 
-	if (user_recv) {
-		if (!skb_queue_empty(&tp->ucopy.prequeue)) {
-			int chunk;
-
-			tp->ucopy.len = copied > 0 ? len : 0;
-
-			tcp_prequeue_process(sk);
-
-			if (copied > 0 && (chunk = len - tp->ucopy.len) != 0) {
-				NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk);
-				len -= chunk;
-				copied += chunk;
-			}
-		}
-
-		tp->ucopy.task = NULL;
-		tp->ucopy.len = 0;
-	}
-
 	/* According to UNIX98, msg_name/msg_namelen are ignored
 	 * on connected socket. I was just happy when found this 8) --ANK
 	 */
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index adc3f3e9468c..770ce6cb3eca 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4611,22 +4611,6 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
 			goto out_of_window;
 
 		/* Ok. In sequence. In window. */
-		if (tp->ucopy.task == current &&
-		    tp->copied_seq == tp->rcv_nxt && tp->ucopy.len &&
-		    sock_owned_by_user(sk) && !tp->urg_data) {
-			int chunk = min_t(unsigned int, skb->len,
-					  tp->ucopy.len);
-
-			__set_current_state(TASK_RUNNING);
-
-			if (!skb_copy_datagram_msg(skb, 0, tp->ucopy.msg, chunk)) {
-				tp->ucopy.len -= chunk;
-				tp->copied_seq += chunk;
-				eaten = (chunk == skb->len);
-				tcp_rcv_space_adjust(sk);
-			}
-		}
-
 		if (eaten <= 0) {
 queue_and_out:
 			if (eaten < 0) {
@@ -5186,26 +5170,6 @@ static void tcp_urg(struct sock *sk, struct sk_buff *skb, const struct tcphdr *t
 	}
 }
 
-static int tcp_copy_to_iovec(struct sock *sk, struct sk_buff *skb, int hlen)
-{
-	struct tcp_sock *tp = tcp_sk(sk);
-	int chunk = skb->len - hlen;
-	int err;
-
-	if (skb_csum_unnecessary(skb))
-		err = skb_copy_datagram_msg(skb, hlen, tp->ucopy.msg, chunk);
-	else
-		err = skb_copy_and_csum_datagram_msg(skb, hlen, tp->ucopy.msg);
-
-	if (!err) {
-		tp->ucopy.len -= chunk;
-		tp->copied_seq += chunk;
-		tcp_rcv_space_adjust(sk);
-	}
-
-	return err;
-}
-
 /* Accept RST for rcv_nxt - 1 after a FIN.
  * When tcp connections are abruptly terminated from Mac OSX (via ^C), a
  * FIN is sent followed by a RST packet. The RST is sent with the same
@@ -5446,32 +5410,6 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
 			int eaten = 0;
 			bool fragstolen = false;
 
-			if (tp->ucopy.task == current &&
-			    tp->copied_seq == tp->rcv_nxt &&
-			    len - tcp_header_len <= tp->ucopy.len &&
-			    sock_owned_by_user(sk)) {
-				__set_current_state(TASK_RUNNING);
-
-				if (!tcp_copy_to_iovec(sk, skb, tcp_header_len)) {
-					/* Predicted packet is in window by definition.
-					 * seq == rcv_nxt and rcv_wup <= rcv_nxt.
-					 * Hence, check seq<=rcv_wup reduces to:
-					 */
-					if (tcp_header_len ==
-					    (sizeof(struct tcphdr) +
-					     TCPOLEN_TSTAMP_ALIGNED) &&
-					    tp->rcv_nxt == tp->rcv_wup)
-						tcp_store_ts_recent(tp);
-
-					tcp_rcv_rtt_measure_ts(sk, skb);
-
-					__skb_pull(skb, tcp_header_len);
-					tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq);
-					NET_INC_STATS(sock_net(sk),
-							LINUX_MIB_TCPHPHITSTOUSER);
-					eaten = 1;
-				}
-			}
 			if (!eaten) {
 				if (tcp_checksum_complete(skb))
 					goto csum_error;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 3a19ea28339f..a68eb4577d36 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1541,61 +1541,6 @@ void tcp_v4_early_demux(struct sk_buff *skb)
 	}
 }
 
-/* Packet is added to VJ-style prequeue for processing in process
- * context, if a reader task is waiting. Apparently, this exciting
- * idea (VJ's mail "Re: query about TCP header on tcp-ip" of 07 Sep 93)
- * failed somewhere. Latency? Burstiness? Well, at least now we will
- * see, why it failed. 8)8)				  --ANK
- *
- */
-bool tcp_prequeue(struct sock *sk, struct sk_buff *skb)
-{
-	struct tcp_sock *tp = tcp_sk(sk);
-
-	if (sysctl_tcp_low_latency || !tp->ucopy.task)
-		return false;
-
-	if (skb->len <= tcp_hdrlen(skb) &&
-	    skb_queue_len(&tp->ucopy.prequeue) == 0)
-		return false;
-
-	/* Before escaping RCU protected region, we need to take care of skb
-	 * dst. Prequeue is only enabled for established sockets.
-	 * For such sockets, we might need the skb dst only to set sk->sk_rx_dst
-	 * Instead of doing full sk_rx_dst validity here, let's perform
-	 * an optimistic check.
-	 */
-	if (likely(sk->sk_rx_dst))
-		skb_dst_drop(skb);
-	else
-		skb_dst_force_safe(skb);
-
-	__skb_queue_tail(&tp->ucopy.prequeue, skb);
-	tp->ucopy.memory += skb->truesize;
-	if (skb_queue_len(&tp->ucopy.prequeue) >= 32 ||
-	    tp->ucopy.memory + atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf) {
-		struct sk_buff *skb1;
-
-		BUG_ON(sock_owned_by_user(sk));
-		__NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPPREQUEUEDROPPED,
-				skb_queue_len(&tp->ucopy.prequeue));
-
-		while ((skb1 = __skb_dequeue(&tp->ucopy.prequeue)) != NULL)
-			sk_backlog_rcv(sk, skb1);
-
-		tp->ucopy.memory = 0;
-	} else if (skb_queue_len(&tp->ucopy.prequeue) == 1) {
-		wake_up_interruptible_sync_poll(sk_sleep(sk),
-					   POLLIN | POLLRDNORM | POLLRDBAND);
-		if (!inet_csk_ack_scheduled(sk))
-			inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
-						  (3 * tcp_rto_min(sk)) / 4,
-						  TCP_RTO_MAX);
-	}
-	return true;
-}
-EXPORT_SYMBOL(tcp_prequeue);
-
 bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb)
 {
 	u32 limit = sk->sk_rcvbuf + sk->sk_sndbuf;
@@ -1770,8 +1715,7 @@ process:
 	tcp_segs_in(tcp_sk(sk), skb);
 	ret = 0;
 	if (!sock_owned_by_user(sk)) {
-		if (!tcp_prequeue(sk, skb))
-			ret = tcp_v4_do_rcv(sk, skb);
+		ret = tcp_v4_do_rcv(sk, skb);
 	} else if (tcp_add_backlog(sk, skb)) {
 		goto discard_and_relse;
 	}
@@ -1936,9 +1880,6 @@ void tcp_v4_destroy_sock(struct sock *sk)
 	}
 #endif
 
-	/* Clean prequeue, it must be empty really */
-	__skb_queue_purge(&tp->ucopy.prequeue);
-
 	/* Clean up a referenced TCP bind bucket. */
 	if (inet_csk(sk)->icsk_bind_hash)
 		inet_put_port(sk);
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 0ff83c1637d8..188a6f31356d 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -445,7 +445,6 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
 		newtp->snd_sml = newtp->snd_una =
 		newtp->snd_nxt = newtp->snd_up = treq->snt_isn + 1;
 
-		tcp_prequeue_init(newtp);
 		INIT_LIST_HEAD(&newtp->tsq_node);
 
 		tcp_init_wl(newtp, treq->rcv_isn);
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index c0feeeef962a..f753f9d2fee3 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -239,7 +239,6 @@ static int tcp_write_timeout(struct sock *sk)
 /* Called with BH disabled */
 void tcp_delack_timer_handler(struct sock *sk)
 {
-	struct tcp_sock *tp = tcp_sk(sk);
 	struct inet_connection_sock *icsk = inet_csk(sk);
 
 	sk_mem_reclaim_partial(sk);
@@ -254,17 +253,6 @@ void tcp_delack_timer_handler(struct sock *sk)
 	}
 	icsk->icsk_ack.pending &= ~ICSK_ACK_TIMER;
 
-	if (!skb_queue_empty(&tp->ucopy.prequeue)) {
-		struct sk_buff *skb;
-
-		__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSCHEDULERFAILED);
-
-		while ((skb = __skb_dequeue(&tp->ucopy.prequeue)) != NULL)
-			sk_backlog_rcv(sk, skb);
-
-		tp->ucopy.memory = 0;
-	}
-
 	if (inet_csk_ack_scheduled(sk)) {
 		if (!icsk->icsk_ack.pingpong) {
 			/* Delayed ACK missed: inflate ATO. */
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 2968a33cca7d..39ee8e7fc4bd 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1505,8 +1505,7 @@ process:
 	tcp_segs_in(tcp_sk(sk), skb);
 	ret = 0;
 	if (!sock_owned_by_user(sk)) {
-		if (!tcp_prequeue(sk, skb))
-			ret = tcp_v6_do_rcv(sk, skb);
+		ret = tcp_v6_do_rcv(sk, skb);
 	} else if (tcp_add_backlog(sk, skb)) {
 		goto discard_and_relse;
 	}
-- 
cgit v1.2.3


From b6690b14386698ce2c19309abad3f17656bdfaea Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Sun, 30 Jul 2017 03:57:20 +0200
Subject: tcp: remove low_latency sysctl

Was only checked by the removed prequeue code.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ip-sysctl.txt | 7 +------
 include/net/tcp.h                      | 1 -
 net/ipv4/sysctl_net_ipv4.c             | 3 +++
 net/ipv4/tcp_ipv4.c                    | 2 --
 4 files changed, 4 insertions(+), 9 deletions(-)

(limited to 'include/net')

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index f485d553e65c..84c9b8cee780 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -353,12 +353,7 @@ tcp_l3mdev_accept - BOOLEAN
 	compiled with CONFIG_NET_L3_MASTER_DEV.
 
 tcp_low_latency - BOOLEAN
-	If set, the TCP stack makes decisions that prefer lower
-	latency as opposed to higher throughput.  By default, this
-	option is not set meaning that higher throughput is preferred.
-	An example of an application where this default should be
-	changed would be a Beowulf compute cluster.
-	Default: 0
+	This is a legacy option, it has no effect anymore.
 
 tcp_max_orphans - INTEGER
 	Maximal number of TCP sockets not attached to any user file handle,
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 93f115cfc8f8..8507c81fb0e9 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -256,7 +256,6 @@ extern int sysctl_tcp_rmem[3];
 extern int sysctl_tcp_app_win;
 extern int sysctl_tcp_adv_win_scale;
 extern int sysctl_tcp_frto;
-extern int sysctl_tcp_low_latency;
 extern int sysctl_tcp_nometrics_save;
 extern int sysctl_tcp_moderate_rcvbuf;
 extern int sysctl_tcp_tso_win_divisor;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 9bf809726066..0d3c038d7b04 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -45,6 +45,9 @@ static int tcp_syn_retries_max = MAX_TCP_SYNCNT;
 static int ip_ping_group_range_min[] = { 0, 0 };
 static int ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX };
 
+/* obsolete */
+static int sysctl_tcp_low_latency __read_mostly;
+
 /* Update system visible IP port range */
 static void set_local_port_range(struct net *net, int range[2])
 {
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index a68eb4577d36..9b51663cd5a4 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -85,8 +85,6 @@
 #include <crypto/hash.h>
 #include <linux/scatterlist.h>
 
-int sysctl_tcp_low_latency __read_mostly;
-
 #ifdef CONFIG_TCP_MD5SIG
 static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
 			       __be32 daddr, __be32 saddr, const struct tcphdr *th);
-- 
cgit v1.2.3


From 45f119bf936b1f9f546a0b139c5b56f9bb2bdc78 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Sun, 30 Jul 2017 03:57:21 +0200
Subject: tcp: remove header prediction

Like prequeue, I am not sure this is overly useful nowadays.

If we receive a train of packets, GRO will aggregate them if the
headers are the same (HP predates GRO by several years) so we don't
get a per-packet benefit, only a per-aggregated-packet one.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h      |   6 --
 include/net/tcp.h        |  23 ------
 net/ipv4/tcp.c           |   4 +-
 net/ipv4/tcp_input.c     | 192 +++--------------------------------------------
 net/ipv4/tcp_minisocks.c |   2 -
 net/ipv4/tcp_output.c    |   2 -
 6 files changed, 10 insertions(+), 219 deletions(-)

(limited to 'include/net')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 32fb37cfb0d1..d7389ea36e10 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -147,12 +147,6 @@ struct tcp_sock {
 	u16	tcp_header_len;	/* Bytes of tcp header to send		*/
 	u16	gso_segs;	/* Max number of segs per GSO packet	*/
 
-/*
- *	Header prediction flags
- *	0x5?10 << 16 + snd_wnd in net byte order
- */
-	__be32	pred_flags;
-
 /*
  *	RFC793 variables by their proper names. This means you can
  *	read the code and the spec side by side (and laugh ...)
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 8507c81fb0e9..8f11b82b5b5a 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -631,29 +631,6 @@ static inline u32 __tcp_set_rto(const struct tcp_sock *tp)
 	return usecs_to_jiffies((tp->srtt_us >> 3) + tp->rttvar_us);
 }
 
-static inline void __tcp_fast_path_on(struct tcp_sock *tp, u32 snd_wnd)
-{
-	tp->pred_flags = htonl((tp->tcp_header_len << 26) |
-			       ntohl(TCP_FLAG_ACK) |
-			       snd_wnd);
-}
-
-static inline void tcp_fast_path_on(struct tcp_sock *tp)
-{
-	__tcp_fast_path_on(tp, tp->snd_wnd >> tp->rx_opt.snd_wscale);
-}
-
-static inline void tcp_fast_path_check(struct sock *sk)
-{
-	struct tcp_sock *tp = tcp_sk(sk);
-
-	if (RB_EMPTY_ROOT(&tp->out_of_order_queue) &&
-	    tp->rcv_wnd &&
-	    atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf &&
-	    !tp->urg_data)
-		tcp_fast_path_on(tp);
-}
-
 /* Compute the actual rto_min value */
 static inline u32 tcp_rto_min(struct sock *sk)
 {
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 62018ea6f45f..e022874d509f 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1848,10 +1848,8 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
 		tcp_rcv_space_adjust(sk);
 
 skip_copy:
-		if (tp->urg_data && after(tp->copied_seq, tp->urg_seq)) {
+		if (tp->urg_data && after(tp->copied_seq, tp->urg_seq))
 			tp->urg_data = 0;
-			tcp_fast_path_check(sk);
-		}
 		if (used + offset < skb->len)
 			continue;
 
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 87efde9f5a90..bfde9d7d210e 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -103,7 +103,6 @@ int sysctl_tcp_invalid_ratelimit __read_mostly = HZ/2;
 #define FLAG_DATA_SACKED	0x20 /* New SACK.				*/
 #define FLAG_ECE		0x40 /* ECE in this ACK				*/
 #define FLAG_LOST_RETRANS	0x80 /* This ACK marks some retransmission lost */
-#define FLAG_SLOWPATH		0x100 /* Do not skip RFC checks for window update.*/
 #define FLAG_ORIG_SACK_ACKED	0x200 /* Never retransmitted data are (s)acked	*/
 #define FLAG_SND_UNA_ADVANCED	0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */
 #define FLAG_DSACKING_ACK	0x800 /* SACK blocks contained D-SACK info */
@@ -3367,12 +3366,6 @@ static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32
 		if (tp->snd_wnd != nwin) {
 			tp->snd_wnd = nwin;
 
-			/* Note, it is the only place, where
-			 * fast path is recovered for sending TCP.
-			 */
-			tp->pred_flags = 0;
-			tcp_fast_path_check(sk);
-
 			if (tcp_send_head(sk))
 				tcp_slow_start_after_idle_check(sk);
 
@@ -3597,19 +3590,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
 	if (flag & FLAG_UPDATE_TS_RECENT)
 		tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq);
 
-	if (!(flag & FLAG_SLOWPATH) && after(ack, prior_snd_una)) {
-		/* Window is constant, pure forward advance.
-		 * No more checks are required.
-		 * Note, we use the fact that SND.UNA>=SND.WL2.
-		 */
-		tcp_update_wl(tp, ack_seq);
-		tcp_snd_una_update(tp, ack);
-		flag |= FLAG_WIN_UPDATE;
-
-		tcp_in_ack_event(sk, CA_ACK_WIN_UPDATE);
-
-		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHPACKS);
-	} else {
+	{
 		u32 ack_ev_flags = CA_ACK_SLOWPATH;
 
 		if (ack_seq != TCP_SKB_CB(skb)->end_seq)
@@ -4398,8 +4379,6 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
 		return;
 	}
 
-	/* Disable header prediction. */
-	tp->pred_flags = 0;
 	inet_csk_schedule_ack(sk);
 
 	NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOQUEUE);
@@ -4638,8 +4617,6 @@ queue_and_out:
 		if (tp->rx_opt.num_sacks)
 			tcp_sack_remove(tp);
 
-		tcp_fast_path_check(sk);
-
 		if (eaten > 0)
 			kfree_skb_partial(skb, fragstolen);
 		if (!sock_flag(sk, SOCK_DEAD))
@@ -4965,7 +4942,6 @@ static int tcp_prune_queue(struct sock *sk)
 	NET_INC_STATS(sock_net(sk), LINUX_MIB_RCVPRUNED);
 
 	/* Massive buffer overcommit. */
-	tp->pred_flags = 0;
 	return -1;
 }
 
@@ -5137,9 +5113,6 @@ static void tcp_check_urg(struct sock *sk, const struct tcphdr *th)
 
 	tp->urg_data = TCP_URG_NOTYET;
 	tp->urg_seq = ptr;
-
-	/* Disable header prediction. */
-	tp->pred_flags = 0;
 }
 
 /* This is the 'fast' part of urgent handling. */
@@ -5298,26 +5271,6 @@ discard:
 
 /*
  *	TCP receive function for the ESTABLISHED state.
- *
- *	It is split into a fast path and a slow path. The fast path is
- * 	disabled when:
- *	- A zero window was announced from us - zero window probing
- *        is only handled properly in the slow path.
- *	- Out of order segments arrived.
- *	- Urgent data is expected.
- *	- There is no buffer space left
- *	- Unexpected TCP flags/window values/header lengths are received
- *	  (detected by checking the TCP header against pred_flags)
- *	- Data is sent in both directions. Fast path only supports pure senders
- *	  or pure receivers (this means either the sequence number or the ack
- *	  value must stay constant)
- *	- Unexpected TCP option.
- *
- *	When these conditions are not satisfied it drops into a standard
- *	receive procedure patterned after RFC793 to handle all cases.
- *	The first three cases are guaranteed by proper pred_flags setting,
- *	the rest is checked inline. Fast processing is turned on in
- *	tcp_data_queue when everything is OK.
  */
 void tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
 			 const struct tcphdr *th)
@@ -5328,144 +5281,19 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
 	tcp_mstamp_refresh(tp);
 	if (unlikely(!sk->sk_rx_dst))
 		inet_csk(sk)->icsk_af_ops->sk_rx_dst_set(sk, skb);
-	/*
-	 *	Header prediction.
-	 *	The code loosely follows the one in the famous
-	 *	"30 instruction TCP receive" Van Jacobson mail.
-	 *
-	 *	Van's trick is to deposit buffers into socket queue
-	 *	on a device interrupt, to call tcp_recv function
-	 *	on the receive process context and checksum and copy
-	 *	the buffer to user space. smart...
-	 *
-	 *	Our current scheme is not silly either but we take the
-	 *	extra cost of the net_bh soft interrupt processing...
-	 *	We do checksum and copy also but from device to kernel.
-	 */
 
 	tp->rx_opt.saw_tstamp = 0;
 
-	/*	pred_flags is 0xS?10 << 16 + snd_wnd
-	 *	if header_prediction is to be made
-	 *	'S' will always be tp->tcp_header_len >> 2
-	 *	'?' will be 0 for the fast path, otherwise pred_flags is 0 to
-	 *  turn it off	(when there are holes in the receive
-	 *	 space for instance)
-	 *	PSH flag is ignored.
-	 */
-
-	if ((tcp_flag_word(th) & TCP_HP_BITS) == tp->pred_flags &&
-	    TCP_SKB_CB(skb)->seq == tp->rcv_nxt &&
-	    !after(TCP_SKB_CB(skb)->ack_seq, tp->snd_nxt)) {
-		int tcp_header_len = tp->tcp_header_len;
-
-		/* Timestamp header prediction: tcp_header_len
-		 * is automatically equal to th->doff*4 due to pred_flags
-		 * match.
-		 */
-
-		/* Check timestamp */
-		if (tcp_header_len == sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) {
-			/* No? Slow path! */
-			if (!tcp_parse_aligned_timestamp(tp, th))
-				goto slow_path;
-
-			/* If PAWS failed, check it more carefully in slow path */
-			if ((s32)(tp->rx_opt.rcv_tsval - tp->rx_opt.ts_recent) < 0)
-				goto slow_path;
-
-			/* DO NOT update ts_recent here, if checksum fails
-			 * and timestamp was corrupted part, it will result
-			 * in a hung connection since we will drop all
-			 * future packets due to the PAWS test.
-			 */
-		}
-
-		if (len <= tcp_header_len) {
-			/* Bulk data transfer: sender */
-			if (len == tcp_header_len) {
-				/* Predicted packet is in window by definition.
-				 * seq == rcv_nxt and rcv_wup <= rcv_nxt.
-				 * Hence, check seq<=rcv_wup reduces to:
-				 */
-				if (tcp_header_len ==
-				    (sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) &&
-				    tp->rcv_nxt == tp->rcv_wup)
-					tcp_store_ts_recent(tp);
-
-				/* We know that such packets are checksummed
-				 * on entry.
-				 */
-				tcp_ack(sk, skb, 0);
-				__kfree_skb(skb);
-				tcp_data_snd_check(sk);
-				return;
-			} else { /* Header too small */
-				TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
-				goto discard;
-			}
-		} else {
-			int eaten = 0;
-			bool fragstolen = false;
-
-			if (tcp_checksum_complete(skb))
-				goto csum_error;
-
-			if ((int)skb->truesize > sk->sk_forward_alloc)
-				goto step5;
-
-			/* Predicted packet is in window by definition.
-			 * seq == rcv_nxt and rcv_wup <= rcv_nxt.
-			 * Hence, check seq<=rcv_wup reduces to:
-			 */
-			if (tcp_header_len ==
-			    (sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) &&
-			    tp->rcv_nxt == tp->rcv_wup)
-				tcp_store_ts_recent(tp);
-
-			tcp_rcv_rtt_measure_ts(sk, skb);
-
-			NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHPHITS);
-
-			/* Bulk data transfer: receiver */
-			eaten = tcp_queue_rcv(sk, skb, tcp_header_len,
-					      &fragstolen);
-
-			tcp_event_data_recv(sk, skb);
-
-			if (TCP_SKB_CB(skb)->ack_seq != tp->snd_una) {
-				/* Well, only one small jumplet in fast path... */
-				tcp_ack(sk, skb, FLAG_DATA);
-				tcp_data_snd_check(sk);
-				if (!inet_csk_ack_scheduled(sk))
-					goto no_ack;
-			}
-
-			__tcp_ack_snd_check(sk, 0);
-no_ack:
-			if (eaten)
-				kfree_skb_partial(skb, fragstolen);
-			sk->sk_data_ready(sk);
-			return;
-		}
-	}
-
-slow_path:
 	if (len < (th->doff << 2) || tcp_checksum_complete(skb))
 		goto csum_error;
 
 	if (!th->ack && !th->rst && !th->syn)
 		goto discard;
 
-	/*
-	 *	Standard slow path.
-	 */
-
 	if (!tcp_validate_incoming(sk, skb, th, 1))
 		return;
 
-step5:
-	if (tcp_ack(sk, skb, FLAG_SLOWPATH | FLAG_UPDATE_TS_RECENT) < 0)
+	if (tcp_ack(sk, skb, FLAG_UPDATE_TS_RECENT) < 0)
 		goto discard;
 
 	tcp_rcv_rtt_measure_ts(sk, skb);
@@ -5519,11 +5347,10 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb)
 	if (sock_flag(sk, SOCK_KEEPOPEN))
 		inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tp));
 
-	if (!tp->rx_opt.snd_wscale)
-		__tcp_fast_path_on(tp, tp->snd_wnd);
-	else
-		tp->pred_flags = 0;
-
+	if (!sock_flag(sk, SOCK_DEAD)) {
+		sk->sk_state_change(sk);
+		sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT);
+	}
 }
 
 static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
@@ -5652,7 +5479,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
 		tcp_ecn_rcv_synack(tp, th);
 
 		tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
-		tcp_ack(sk, skb, FLAG_SLOWPATH);
+		tcp_ack(sk, skb, 0);
 
 		/* Ok.. it's good. Set up sequence numbers and
 		 * move to established.
@@ -5888,8 +5715,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
 		return 0;
 
 	/* step 5: check the ACK field */
-	acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH |
-				      FLAG_UPDATE_TS_RECENT |
+
+	acceptable = tcp_ack(sk, skb, FLAG_UPDATE_TS_RECENT |
 				      FLAG_NO_CHALLENGE_ACK) > 0;
 
 	if (!acceptable) {
@@ -5957,7 +5784,6 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
 		tp->lsndtime = tcp_jiffies32;
 
 		tcp_initialize_rcv_mss(sk);
-		tcp_fast_path_on(tp);
 		break;
 
 	case TCP_FIN_WAIT1: {
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 188a6f31356d..1537b87c657f 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -436,8 +436,6 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
 		struct tcp_sock *newtp = tcp_sk(newsk);
 
 		/* Now setup tcp_sock */
-		newtp->pred_flags = 0;
-
 		newtp->rcv_wup = newtp->copied_seq =
 		newtp->rcv_nxt = treq->rcv_isn + 1;
 		newtp->segs_in = 1;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 886d874775df..8380464aead1 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -295,9 +295,7 @@ static u16 tcp_select_window(struct sock *sk)
 	/* RFC1323 scaling applied */
 	new_win >>= tp->rx_opt.rcv_wscale;
 
-	/* If we advertise zero window, disable fast path. */
 	if (new_win == 0) {
-		tp->pred_flags = 0;
 		if (old_win)
 			NET_INC_STATS(sock_net(sk),
 				      LINUX_MIB_TCPTOZEROWINDOWADV);
-- 
cgit v1.2.3


From 573aeb0492be3d0e5be9796a0c91abde794c1e36 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Sun, 30 Jul 2017 03:57:22 +0200
Subject: tcp: remove CA_ACK_SLOWPATH

re-indent tcp_ack, and remove CA_ACK_SLOWPATH; it is always set now.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h       |  5 ++---
 net/ipv4/tcp_input.c    | 35 ++++++++++++++++-------------------
 net/ipv4/tcp_westwood.c | 31 ++++---------------------------
 3 files changed, 22 insertions(+), 49 deletions(-)

(limited to 'include/net')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 8f11b82b5b5a..3ecb62811004 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -880,9 +880,8 @@ enum tcp_ca_event {
 
 /* Information about inbound ACK, passed to cong_ops->in_ack_event() */
 enum tcp_ca_ack_event_flags {
-	CA_ACK_SLOWPATH		= (1 << 0),	/* In slow path processing */
-	CA_ACK_WIN_UPDATE	= (1 << 1),	/* ACK updated window */
-	CA_ACK_ECE		= (1 << 2),	/* ECE bit is set on ack */
+	CA_ACK_WIN_UPDATE	= (1 << 0),	/* ACK updated window */
+	CA_ACK_ECE		= (1 << 1),	/* ECE bit is set on ack */
 };
 
 /*
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index bfde9d7d210e..af0a98d54b62 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3547,6 +3547,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
 	u32 lost = tp->lost;
 	int acked = 0; /* Number of packets newly acked */
 	int rexmit = REXMIT_NONE; /* Flag to (re)transmit to recover losses */
+	u32 ack_ev_flags = 0;
 
 	sack_state.first_sackt = 0;
 	sack_state.rate = &rs;
@@ -3590,30 +3591,26 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
 	if (flag & FLAG_UPDATE_TS_RECENT)
 		tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq);
 
-	{
-		u32 ack_ev_flags = CA_ACK_SLOWPATH;
-
-		if (ack_seq != TCP_SKB_CB(skb)->end_seq)
-			flag |= FLAG_DATA;
-		else
-			NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPPUREACKS);
+	if (ack_seq != TCP_SKB_CB(skb)->end_seq)
+		flag |= FLAG_DATA;
+	else
+		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPPUREACKS);
 
-		flag |= tcp_ack_update_window(sk, skb, ack, ack_seq);
+	flag |= tcp_ack_update_window(sk, skb, ack, ack_seq);
 
-		if (TCP_SKB_CB(skb)->sacked)
-			flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
-							&sack_state);
+	if (TCP_SKB_CB(skb)->sacked)
+		flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
+						&sack_state);
 
-		if (tcp_ecn_rcv_ecn_echo(tp, tcp_hdr(skb))) {
-			flag |= FLAG_ECE;
-			ack_ev_flags |= CA_ACK_ECE;
-		}
+	if (tcp_ecn_rcv_ecn_echo(tp, tcp_hdr(skb))) {
+		flag |= FLAG_ECE;
+		ack_ev_flags = CA_ACK_ECE;
+	}
 
-		if (flag & FLAG_WIN_UPDATE)
-			ack_ev_flags |= CA_ACK_WIN_UPDATE;
+	if (flag & FLAG_WIN_UPDATE)
+		ack_ev_flags |= CA_ACK_WIN_UPDATE;
 
-		tcp_in_ack_event(sk, ack_ev_flags);
-	}
+	tcp_in_ack_event(sk, ack_ev_flags);
 
 	/* We passed data and got it acked, remove any soft error
 	 * log. Something worked...
diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c
index bec9cafbe3f9..e5de84310949 100644
--- a/net/ipv4/tcp_westwood.c
+++ b/net/ipv4/tcp_westwood.c
@@ -153,24 +153,6 @@ static inline void update_rtt_min(struct westwood *w)
 		w->rtt_min = min(w->rtt, w->rtt_min);
 }
 
-/*
- * @westwood_fast_bw
- * It is called when we are in fast path. In particular it is called when
- * header prediction is successful. In such case in fact update is
- * straight forward and doesn't need any particular care.
- */
-static inline void westwood_fast_bw(struct sock *sk)
-{
-	const struct tcp_sock *tp = tcp_sk(sk);
-	struct westwood *w = inet_csk_ca(sk);
-
-	westwood_update_window(sk);
-
-	w->bk += tp->snd_una - w->snd_una;
-	w->snd_una = tp->snd_una;
-	update_rtt_min(w);
-}
-
 /*
  * @westwood_acked_count
  * This function evaluates cumul_ack for evaluating bk in case of
@@ -223,17 +205,12 @@ static u32 tcp_westwood_bw_rttmin(const struct sock *sk)
 
 static void tcp_westwood_ack(struct sock *sk, u32 ack_flags)
 {
-	if (ack_flags & CA_ACK_SLOWPATH) {
-		struct westwood *w = inet_csk_ca(sk);
-
-		westwood_update_window(sk);
-		w->bk += westwood_acked_count(sk);
+	struct westwood *w = inet_csk_ca(sk);
 
-		update_rtt_min(w);
-		return;
-	}
+	westwood_update_window(sk);
+	w->bk += westwood_acked_count(sk);
 
-	westwood_fast_bw(sk);
+	update_rtt_min(w);
 }
 
 static void tcp_westwood_event(struct sock *sk, enum tcp_ca_event event)
-- 
cgit v1.2.3


From 306b13eb3cf9515a8214bbf5d69d811371d05792 Mon Sep 17 00:00:00 2001
From: Tom Herbert <tom@quantonium.net>
Date: Fri, 28 Jul 2017 16:22:41 -0700
Subject: proto_ops: Add locked held versions of sendmsg and sendpage

Add new proto_ops sendmsg_locked and sendpage_locked that can be
called when the socket lock is already held. Correspondingly, add
kernel_sendmsg_locked and kernel_sendpage_locked as front end
functions.

These functions will be used in zero proxy so that we can take
the socket lock in a ULP sendmsg/sendpage and then directly call the
backend transport proto_ops functions.

Signed-off-by: Tom Herbert <tom@quantonium.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/net.h | 12 ++++++++++++
 include/net/sock.h  |  3 +++
 include/net/tcp.h   |  3 +++
 net/core/sock.c     | 22 ++++++++++++++++++++++
 net/ipv4/af_inet.c  |  2 ++
 net/ipv4/tcp.c      | 39 ++++++++++++++++++++++++++-------------
 net/socket.c        | 27 +++++++++++++++++++++++++++
 7 files changed, 95 insertions(+), 13 deletions(-)

(limited to 'include/net')

diff --git a/include/linux/net.h b/include/linux/net.h
index dda2cc939a53..b5c15b31709b 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -190,8 +190,16 @@ struct proto_ops {
 				       struct pipe_inode_info *pipe, size_t len, unsigned int flags);
 	int		(*set_peek_off)(struct sock *sk, int val);
 	int		(*peek_len)(struct socket *sock);
+
+	/* The following functions are called internally by kernel with
+	 * sock lock already held.
+	 */
 	int		(*read_sock)(struct sock *sk, read_descriptor_t *desc,
 				     sk_read_actor_t recv_actor);
+	int		(*sendpage_locked)(struct sock *sk, struct page *page,
+					   int offset, size_t size, int flags);
+	int		(*sendmsg_locked)(struct sock *sk, struct msghdr *msg,
+					  size_t size);
 };
 
 #define DECLARE_SOCKADDR(type, dst, src)	\
@@ -279,6 +287,8 @@ do {									\
 
 int kernel_sendmsg(struct socket *sock, struct msghdr *msg, struct kvec *vec,
 		   size_t num, size_t len);
+int kernel_sendmsg_locked(struct sock *sk, struct msghdr *msg,
+			  struct kvec *vec, size_t num, size_t len);
 int kernel_recvmsg(struct socket *sock, struct msghdr *msg, struct kvec *vec,
 		   size_t num, size_t len, int flags);
 
@@ -297,6 +307,8 @@ int kernel_setsockopt(struct socket *sock, int level, int optname, char *optval,
 		      unsigned int optlen);
 int kernel_sendpage(struct socket *sock, struct page *page, int offset,
 		    size_t size, int flags);
+int kernel_sendpage_locked(struct sock *sk, struct page *page, int offset,
+			   size_t size, int flags);
 int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg);
 int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how);
 
diff --git a/include/net/sock.h b/include/net/sock.h
index 7c0632c7e870..393c38e9f6aa 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1582,11 +1582,14 @@ int sock_no_shutdown(struct socket *, int);
 int sock_no_getsockopt(struct socket *, int , int, char __user *, int __user *);
 int sock_no_setsockopt(struct socket *, int, int, char __user *, unsigned int);
 int sock_no_sendmsg(struct socket *, struct msghdr *, size_t);
+int sock_no_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t len);
 int sock_no_recvmsg(struct socket *, struct msghdr *, size_t, int);
 int sock_no_mmap(struct file *file, struct socket *sock,
 		 struct vm_area_struct *vma);
 ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset,
 			 size_t size, int flags);
+ssize_t sock_no_sendpage_locked(struct sock *sk, struct page *page,
+				int offset, size_t size, int flags);
 
 /*
  * Functions to fill in entries in struct proto_ops when a protocol
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 3ecb62811004..bb1881b4ce48 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -350,8 +350,11 @@ int tcp_v4_rcv(struct sk_buff *skb);
 
 int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw);
 int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
+int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size);
 int tcp_sendpage(struct sock *sk, struct page *page, int offset, size_t size,
 		 int flags);
+int tcp_sendpage_locked(struct sock *sk, struct page *page, int offset,
+			size_t size, int flags);
 ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset,
 		 size_t size, int flags);
 void tcp_release_cb(struct sock *sk);
diff --git a/net/core/sock.c b/net/core/sock.c
index ac2a404c73eb..742f68c9c84a 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2500,6 +2500,12 @@ int sock_no_sendmsg(struct socket *sock, struct msghdr *m, size_t len)
 }
 EXPORT_SYMBOL(sock_no_sendmsg);
 
+int sock_no_sendmsg_locked(struct sock *sk, struct msghdr *m, size_t len)
+{
+	return -EOPNOTSUPP;
+}
+EXPORT_SYMBOL(sock_no_sendmsg_locked);
+
 int sock_no_recvmsg(struct socket *sock, struct msghdr *m, size_t len,
 		    int flags)
 {
@@ -2528,6 +2534,22 @@ ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, siz
 }
 EXPORT_SYMBOL(sock_no_sendpage);
 
+ssize_t sock_no_sendpage_locked(struct sock *sk, struct page *page,
+				int offset, size_t size, int flags)
+{
+	ssize_t res;
+	struct msghdr msg = {.msg_flags = flags};
+	struct kvec iov;
+	char *kaddr = kmap(page);
+
+	iov.iov_base = kaddr + offset;
+	iov.iov_len = size;
+	res = kernel_sendmsg_locked(sk, &msg, &iov, 1, size);
+	kunmap(page);
+	return res;
+}
+EXPORT_SYMBOL(sock_no_sendpage_locked);
+
 /*
  *	Default Socket Callbacks
  */
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 5ce44fb7d498..f0103ffe1cdb 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -944,6 +944,8 @@ const struct proto_ops inet_stream_ops = {
 	.sendpage	   = inet_sendpage,
 	.splice_read	   = tcp_splice_read,
 	.read_sock	   = tcp_read_sock,
+	.sendmsg_locked    = tcp_sendmsg_locked,
+	.sendpage_locked   = tcp_sendpage_locked,
 	.peek_len	   = tcp_peek_len,
 #ifdef CONFIG_COMPAT
 	.compat_setsockopt = compat_sock_common_setsockopt,
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 5326b50a3450..9dd6f4dba9b1 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1046,23 +1046,29 @@ out_err:
 }
 EXPORT_SYMBOL_GPL(do_tcp_sendpages);
 
-int tcp_sendpage(struct sock *sk, struct page *page, int offset,
-		 size_t size, int flags)
+int tcp_sendpage_locked(struct sock *sk, struct page *page, int offset,
+			size_t size, int flags)
 {
-	ssize_t res;
-
 	if (!(sk->sk_route_caps & NETIF_F_SG) ||
 	    !sk_check_csum_caps(sk))
 		return sock_no_sendpage(sk->sk_socket, page, offset, size,
 					flags);
 
-	lock_sock(sk);
-
 	tcp_rate_check_app_limited(sk);  /* is sending application-limited? */
 
-	res = do_tcp_sendpages(sk, page, offset, size, flags);
+	return do_tcp_sendpages(sk, page, offset, size, flags);
+}
+
+int tcp_sendpage(struct sock *sk, struct page *page, int offset,
+		 size_t size, int flags)
+{
+	int ret;
+
+	lock_sock(sk);
+	ret = tcp_sendpage_locked(sk, page, offset, size, flags);
 	release_sock(sk);
-	return res;
+
+	return ret;
 }
 EXPORT_SYMBOL(tcp_sendpage);
 
@@ -1156,7 +1162,7 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
 	return err;
 }
 
-int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
+int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct sk_buff *skb;
@@ -1167,8 +1173,6 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
 	bool sg;
 	long timeo;
 
-	lock_sock(sk);
-
 	flags = msg->msg_flags;
 	if (unlikely(flags & MSG_FASTOPEN || inet_sk(sk)->defer_connect)) {
 		err = tcp_sendmsg_fastopen(sk, msg, &copied_syn, size);
@@ -1377,7 +1381,6 @@ out:
 		tcp_push(sk, flags, mss_now, tp->nonagle, size_goal);
 	}
 out_nopush:
-	release_sock(sk);
 	return copied + copied_syn;
 
 do_fault:
@@ -1401,9 +1404,19 @@ out_err:
 		sk->sk_write_space(sk);
 		tcp_chrono_stop(sk, TCP_CHRONO_SNDBUF_LIMITED);
 	}
-	release_sock(sk);
 	return err;
 }
+
+int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
+{
+	int ret;
+
+	lock_sock(sk);
+	ret = tcp_sendmsg_locked(sk, msg, size);
+	release_sock(sk);
+
+	return ret;
+}
 EXPORT_SYMBOL(tcp_sendmsg);
 
 /*
diff --git a/net/socket.c b/net/socket.c
index cb0fdf799f40..b332d1e8e4e4 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -652,6 +652,20 @@ int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
 }
 EXPORT_SYMBOL(kernel_sendmsg);
 
+int kernel_sendmsg_locked(struct sock *sk, struct msghdr *msg,
+			  struct kvec *vec, size_t num, size_t size)
+{
+	struct socket *sock = sk->sk_socket;
+
+	if (!sock->ops->sendmsg_locked)
+		sock_no_sendmsg_locked(sk, msg, size);
+
+	iov_iter_kvec(&msg->msg_iter, WRITE | ITER_KVEC, vec, num, size);
+
+	return sock->ops->sendmsg_locked(sk, msg, msg_data_left(msg));
+}
+EXPORT_SYMBOL(kernel_sendmsg_locked);
+
 static bool skb_is_err_queue(const struct sk_buff *skb)
 {
 	/* pkt_type of skbs enqueued on the error queue are set to
@@ -3376,6 +3390,19 @@ int kernel_sendpage(struct socket *sock, struct page *page, int offset,
 }
 EXPORT_SYMBOL(kernel_sendpage);
 
+int kernel_sendpage_locked(struct sock *sk, struct page *page, int offset,
+			   size_t size, int flags)
+{
+	struct socket *sock = sk->sk_socket;
+
+	if (sock->ops->sendpage_locked)
+		return sock->ops->sendpage_locked(sk, page, offset, size,
+						  flags);
+
+	return sock_no_sendpage_locked(sk, page, offset, size, flags);
+}
+EXPORT_SYMBOL(kernel_sendpage_locked);
+
 int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
 {
 	mm_segment_t oldfs = get_fs();
-- 
cgit v1.2.3


From bbb03029a899679d73e62d7e6ae80348cc5d0054 Mon Sep 17 00:00:00 2001
From: Tom Herbert <tom@quantonium.net>
Date: Fri, 28 Jul 2017 16:22:43 -0700
Subject: strparser: Generalize strparser

Generalize strparser from more than just being used in conjunction
with read_sock. strparser will also be used in the send path with
zero proxy. The primary change is to create strp_process function
that performs the critical processing on skbs. The documentation
is also updated to reflect the new uses.

Signed-off-by: Tom Herbert <tom@quantonium.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/strparser.txt | 207 +++++++++++++++-------
 include/net/strparser.h                | 119 +++++++------
 net/kcm/kcmproc.c                      |  34 ++--
 net/kcm/kcmsock.c                      |  38 ++--
 net/strparser/strparser.c              | 313 ++++++++++++++++++++-------------
 5 files changed, 424 insertions(+), 287 deletions(-)

(limited to 'include/net')

diff --git a/Documentation/networking/strparser.txt b/Documentation/networking/strparser.txt
index a0bf573dfa61..fe01302471ae 100644
--- a/Documentation/networking/strparser.txt
+++ b/Documentation/networking/strparser.txt
@@ -1,45 +1,107 @@
-Stream Parser
--------------
+Stream Parser (strparser)
+
+Introduction
+============
 
 The stream parser (strparser) is a utility that parses messages of an
-application layer protocol running over a TCP connection. The stream
+application layer protocol running over a data stream. The stream
 parser works in conjunction with an upper layer in the kernel to provide
 kernel support for application layer messages. For instance, Kernel
 Connection Multiplexor (KCM) uses the Stream Parser to parse messages
 using a BPF program.
 
+The strparser works in one of two modes: receive callback or general
+mode.
+
+In receive callback mode, the strparser is called from the data_ready
+callback of a TCP socket. Messages are parsed and delivered as they are
+received on the socket.
+
+In general mode, a sequence of skbs are fed to strparser from an
+outside source. Message are parsed and delivered as the sequence is
+processed. This modes allows strparser to be applied to arbitrary
+streams of data.
+
 Interface
----------
+=========
 
 The API includes a context structure, a set of callbacks, utility
-functions, and a data_ready function. The callbacks include
-a parse_msg function that is called to perform parsing (e.g.
-BPF parsing in case of KCM), and a rcv_msg function that is called
-when a full message has been completed.
+functions, and a data_ready function for receive callback mode. The
+callbacks include a parse_msg function that is called to perform
+parsing (e.g.  BPF parsing in case of KCM), and a rcv_msg function
+that is called when a full message has been completed.
 
-A stream parser can be instantiated for a TCP connection. This is done
-by:
+Functions
+=========
 
-strp_init(struct strparser *strp, struct sock *csk,
+strp_init(struct strparser *strp, struct sock *sk,
 	  struct strp_callbacks *cb)
 
-strp is a struct of type strparser that is allocated by the upper layer.
-csk is the TCP socket associated with the stream parser. Callbacks are
-called by the stream parser.
+     Called to initialize a stream parser. strp is a struct of type
+     strparser that is allocated by the upper layer. sk is the TCP
+     socket associated with the stream parser for use with receive
+     callback mode; in general mode this is set to NULL. Callbacks
+     are called by the stream parser (the callbacks are listed below).
+
+void strp_pause(struct strparser *strp)
+
+     Temporarily pause a stream parser. Message parsing is suspended
+     and no new messages are delivered to the upper layer.
+
+void strp_pause(struct strparser *strp)
+
+     Unpause a paused stream parser.
+
+void strp_stop(struct strparser *strp);
+
+     strp_stop is called to completely stop stream parser operations.
+     This is called internally when the stream parser encounters an
+     error, and it is called from the upper layer to stop parsing
+     operations.
+
+void strp_done(struct strparser *strp);
+
+     strp_done is called to release any resources held by the stream
+     parser instance. This must be called after the stream processor
+     has been stopped.
+
+int strp_process(struct strparser *strp, struct sk_buff *orig_skb,
+		 unsigned int orig_offset, size_t orig_len,
+		 size_t max_msg_size, long timeo)
+
+    strp_process is called in general mode for a stream parser to
+    parse an sk_buff. The number of bytes processed or a negative
+    error number is returned. Note that strp_process does not
+    consume the sk_buff. max_msg_size is maximum size the stream
+    parser will parse. timeo is timeout for completing a message.
+
+void strp_data_ready(struct strparser *strp);
+
+    The upper layer calls strp_tcp_data_ready when data is ready on
+    the lower socket for strparser to process. This should be called
+    from a data_ready callback that is set on the socket. Note that
+    maximum messages size is the limit of the receive socket
+    buffer and message timeout is the receive timeout for the socket.
+
+void strp_check_rcv(struct strparser *strp);
+
+    strp_check_rcv is called to check for new messages on the socket.
+    This is normally called at initialization of a stream parser
+    instance or after strp_unpause.
 
 Callbacks
----------
+=========
 
-There are four callbacks:
+There are six callbacks:
 
 int (*parse_msg)(struct strparser *strp, struct sk_buff *skb);
 
     parse_msg is called to determine the length of the next message
     in the stream. The upper layer must implement this function. It
     should parse the sk_buff as containing the headers for the
-    next application layer messages in the stream.
+    next application layer message in the stream.
 
-    The skb->cb in the input skb is a struct strp_rx_msg. Only
+    The skb->cb in the input skb is a struct strp_msg. Only
     the offset field is relevant in parse_msg and gives the offset
     where the message starts in the skb.
 
@@ -50,26 +112,41 @@ int (*parse_msg)(struct strparser *strp, struct sk_buff *skb);
     -ESTRPIPE : current message should not be processed by the
           kernel, return control of the socket to userspace which
           can proceed to read the messages itself
-    other < 0 : Error is parsing, give control back to userspace
+    other < 0 : Error in parsing, give control back to userspace
           assuming that synchronization is lost and the stream
           is unrecoverable (application expected to close TCP socket)
 
     In the case that an error is returned (return value is less than
-    zero) the stream parser will set the error on TCP socket and wake
-    it up. If parse_msg returned -ESTRPIPE and the stream parser had
-    previously read some bytes for the current message, then the error
-    set on the attached socket is ENODATA since the stream is
-    unrecoverable in that case.
+    zero) and the parser is in receive callback mode, then it will set
+    the error on TCP socket and wake it up. If parse_msg returned
+    -ESTRPIPE and the stream parser had previously read some bytes for
+    the current message, then the error set on the attached socket is
+    ENODATA since the stream is unrecoverable in that case.
+
+void (*lock)(struct strparser *strp)
+
+    The lock callback is called to lock the strp structure when
+    the strparser is performing an asynchronous operation (such as
+    processing a timeout). In receive callback mode the default
+    function is to lock_sock for the associated socket. In general
+    mode the callback must be set appropriately.
+
+void (*unlock)(struct strparser *strp)
+
+    The unlock callback is called to release the lock obtained
+    by the lock callback. In receive callback mode the default
+    function is release_sock for the associated socket. In general
+    mode the callback must be set appropriately.
 
 void (*rcv_msg)(struct strparser *strp, struct sk_buff *skb);
 
     rcv_msg is called when a full message has been received and
     is queued. The callee must consume the sk_buff; it can
     call strp_pause to prevent any further messages from being
-    received in rcv_msg (see strp_pause below). This callback
+    received in rcv_msg (see strp_pause above). This callback
     must be set.
 
-    The skb->cb in the input skb is a struct strp_rx_msg. This
+    The skb->cb in the input skb is a struct strp_msg. This
     struct contains two fields: offset and full_len. Offset is
     where the message starts in the skb, and full_len is the
     the length of the message. skb->len - offset may be greater
@@ -78,59 +155,53 @@ void (*rcv_msg)(struct strparser *strp, struct sk_buff *skb);
 int (*read_sock_done)(struct strparser *strp, int err);
 
      read_sock_done is called when the stream parser is done reading
-     the TCP socket. The stream parser may read multiple messages
-     in a loop and this function allows cleanup to occur when existing
-     the loop. If the callback is not set (NULL in strp_init) a
-     default function is used.
+     the TCP socket in receive callback mode. The stream parser may
+     read multiple messages in a loop and this function allows cleanup
+     to occur when exiting the loop. If the callback is not set (NULL
+     in strp_init) a default function is used.
 
 void (*abort_parser)(struct strparser *strp, int err);
 
      This function is called when stream parser encounters an error
-     in parsing. The default function stops the stream parser for the
-     TCP socket and sets the error in the socket. The default function
-     can be changed by setting the callback to non-NULL in strp_init.
+     in parsing. The default function stops the stream parser and
+     sets the error in the socket if the parser is in receive callback
+     mode. The default function can be changed by setting the callback
+     to non-NULL in strp_init.
 
-Functions
----------
+Statistics
+==========
 
-The upper layer calls strp_tcp_data_ready when data is ready on the lower
-socket for strparser to process. This should be called from a data_ready
-callback that is set on the socket.
+Various counters are kept for each stream parser instance. These are in
+the strp_stats structure. strp_aggr_stats is a convenience structure for
+accumulating statistics for multiple stream parser instances.
+save_strp_stats and aggregate_strp_stats are helper functions to save
+and aggregate statistics.
 
-strp_stop is called to completely stop stream parser operations. This
-is called internally when the stream parser encounters an error, and
-it is called from the upper layer when unattaching a TCP socket.
+Message assembly limits
+=======================
 
-strp_done is called to unattach the stream parser from the TCP socket.
-This must be called after the stream processor has be stopped.
+The stream parser provide mechanisms to limit the resources consumed by
+message assembly.
 
-strp_check_rcv is called to check for new messages on the socket. This
-is normally called at initialization of the a stream parser instance
-of after strp_unpause.
+A timer is set when assembly starts for a new message. In receive
+callback mode the message timeout is taken from rcvtime for the
+associated TCP socket. In general mode, the timeout is passed as an
+argument in strp_process. If the timer fires before assembly completes
+the stream parser is aborted and the ETIMEDOUT error is set on the TCP
+socket if in receive callback mode.
 
-Statistics
-----------
+In receive callback mode, message length is limited to the receive
+buffer size of the associated TCP socket. If the length returned by
+parse_msg is greater than the socket buffer size then the stream parser
+is aborted with EMSGSIZE error set on the TCP socket. Note that this
+makes the maximum size of receive skbuffs for a socket with a stream
+parser to be 2*sk_rcvbuf of the TCP socket.
 
-Various counters are kept for each stream parser for a TCP socket.
-These are in the strp_stats structure. strp_aggr_stats is a convenience
-structure for accumulating statistics for multiple stream parser
-instances. save_strp_stats and aggregate_strp_stats are helper functions
-to save and aggregate statistics.
+In general mode the message length limit is passed in as an argument
+to strp_process.
 
-Message assembly limits
------------------------
+Author
+======
 
-The stream parser provide mechanisms to limit the resources consumed by
-message assembly.
+Tom Herbert (tom@quantonium.net)
 
-A timer is set when assembly starts for a new message. The message
-timeout is taken from rcvtime for the associated TCP socket. If the
-timer fires before assembly completes the stream parser is aborted
-and the ETIMEDOUT error is set on the TCP socket.
-
-Message length is limited to the receive buffer size of the associated
-TCP socket. If the length returned by parse_msg is greater than
-the socket buffer size then the stream parser is aborted with
-EMSGSIZE error set on the TCP socket. Note that this makes the
-maximum size of receive skbuffs for a socket with a stream parser
-to be 2*sk_rcvbuf of the TCP socket.
diff --git a/include/net/strparser.h b/include/net/strparser.h
index 0c28ad97c52f..4fe966a0ad92 100644
--- a/include/net/strparser.h
+++ b/include/net/strparser.h
@@ -18,26 +18,26 @@
 #define STRP_STATS_INCR(stat) ((stat)++)
 
 struct strp_stats {
-	unsigned long long rx_msgs;
-	unsigned long long rx_bytes;
-	unsigned int rx_mem_fail;
-	unsigned int rx_need_more_hdr;
-	unsigned int rx_msg_too_big;
-	unsigned int rx_msg_timeouts;
-	unsigned int rx_bad_hdr_len;
+	unsigned long long msgs;
+	unsigned long long bytes;
+	unsigned int mem_fail;
+	unsigned int need_more_hdr;
+	unsigned int msg_too_big;
+	unsigned int msg_timeouts;
+	unsigned int bad_hdr_len;
 };
 
 struct strp_aggr_stats {
-	unsigned long long rx_msgs;
-	unsigned long long rx_bytes;
-	unsigned int rx_mem_fail;
-	unsigned int rx_need_more_hdr;
-	unsigned int rx_msg_too_big;
-	unsigned int rx_msg_timeouts;
-	unsigned int rx_bad_hdr_len;
-	unsigned int rx_aborts;
-	unsigned int rx_interrupted;
-	unsigned int rx_unrecov_intr;
+	unsigned long long msgs;
+	unsigned long long bytes;
+	unsigned int mem_fail;
+	unsigned int need_more_hdr;
+	unsigned int msg_too_big;
+	unsigned int msg_timeouts;
+	unsigned int bad_hdr_len;
+	unsigned int aborts;
+	unsigned int interrupted;
+	unsigned int unrecov_intr;
 };
 
 struct strparser;
@@ -48,16 +48,18 @@ struct strp_callbacks {
 	void (*rcv_msg)(struct strparser *strp, struct sk_buff *skb);
 	int (*read_sock_done)(struct strparser *strp, int err);
 	void (*abort_parser)(struct strparser *strp, int err);
+	void (*lock)(struct strparser *strp);
+	void (*unlock)(struct strparser *strp);
 };
 
-struct strp_rx_msg {
+struct strp_msg {
 	int full_len;
 	int offset;
 };
 
-static inline struct strp_rx_msg *strp_rx_msg(struct sk_buff *skb)
+static inline struct strp_msg *strp_msg(struct sk_buff *skb)
 {
-	return (struct strp_rx_msg *)((void *)skb->cb +
+	return (struct strp_msg *)((void *)skb->cb +
 		offsetof(struct qdisc_skb_cb, data));
 }
 
@@ -65,18 +67,18 @@ static inline struct strp_rx_msg *strp_rx_msg(struct sk_buff *skb)
 struct strparser {
 	struct sock *sk;
 
-	u32 rx_stopped : 1;
-	u32 rx_paused : 1;
-	u32 rx_aborted : 1;
-	u32 rx_interrupted : 1;
-	u32 rx_unrecov_intr : 1;
-
-	struct sk_buff **rx_skb_nextp;
-	struct timer_list rx_msg_timer;
-	struct sk_buff *rx_skb_head;
-	unsigned int rx_need_bytes;
-	struct delayed_work rx_delayed_work;
-	struct work_struct rx_work;
+	u32 stopped : 1;
+	u32 paused : 1;
+	u32 aborted : 1;
+	u32 interrupted : 1;
+	u32 unrecov_intr : 1;
+
+	struct sk_buff **skb_nextp;
+	struct timer_list msg_timer;
+	struct sk_buff *skb_head;
+	unsigned int need_bytes;
+	struct delayed_work delayed_work;
+	struct work_struct work;
 	struct strp_stats stats;
 	struct strp_callbacks cb;
 };
@@ -84,7 +86,7 @@ struct strparser {
 /* Must be called with lock held for attached socket */
 static inline void strp_pause(struct strparser *strp)
 {
-	strp->rx_paused = 1;
+	strp->paused = 1;
 }
 
 /* May be called without holding lock for attached socket */
@@ -97,37 +99,37 @@ static inline void save_strp_stats(struct strparser *strp,
 
 #define SAVE_PSOCK_STATS(_stat) (agg_stats->_stat +=		\
 				 strp->stats._stat)
-	SAVE_PSOCK_STATS(rx_msgs);
-	SAVE_PSOCK_STATS(rx_bytes);
-	SAVE_PSOCK_STATS(rx_mem_fail);
-	SAVE_PSOCK_STATS(rx_need_more_hdr);
-	SAVE_PSOCK_STATS(rx_msg_too_big);
-	SAVE_PSOCK_STATS(rx_msg_timeouts);
-	SAVE_PSOCK_STATS(rx_bad_hdr_len);
+	SAVE_PSOCK_STATS(msgs);
+	SAVE_PSOCK_STATS(bytes);
+	SAVE_PSOCK_STATS(mem_fail);
+	SAVE_PSOCK_STATS(need_more_hdr);
+	SAVE_PSOCK_STATS(msg_too_big);
+	SAVE_PSOCK_STATS(msg_timeouts);
+	SAVE_PSOCK_STATS(bad_hdr_len);
 #undef SAVE_PSOCK_STATS
 
-	if (strp->rx_aborted)
-		agg_stats->rx_aborts++;
-	if (strp->rx_interrupted)
-		agg_stats->rx_interrupted++;
-	if (strp->rx_unrecov_intr)
-		agg_stats->rx_unrecov_intr++;
+	if (strp->aborted)
+		agg_stats->aborts++;
+	if (strp->interrupted)
+		agg_stats->interrupted++;
+	if (strp->unrecov_intr)
+		agg_stats->unrecov_intr++;
 }
 
 static inline void aggregate_strp_stats(struct strp_aggr_stats *stats,
 					struct strp_aggr_stats *agg_stats)
 {
 #define SAVE_PSOCK_STATS(_stat) (agg_stats->_stat += stats->_stat)
-	SAVE_PSOCK_STATS(rx_msgs);
-	SAVE_PSOCK_STATS(rx_bytes);
-	SAVE_PSOCK_STATS(rx_mem_fail);
-	SAVE_PSOCK_STATS(rx_need_more_hdr);
-	SAVE_PSOCK_STATS(rx_msg_too_big);
-	SAVE_PSOCK_STATS(rx_msg_timeouts);
-	SAVE_PSOCK_STATS(rx_bad_hdr_len);
-	SAVE_PSOCK_STATS(rx_aborts);
-	SAVE_PSOCK_STATS(rx_interrupted);
-	SAVE_PSOCK_STATS(rx_unrecov_intr);
+	SAVE_PSOCK_STATS(msgs);
+	SAVE_PSOCK_STATS(bytes);
+	SAVE_PSOCK_STATS(mem_fail);
+	SAVE_PSOCK_STATS(need_more_hdr);
+	SAVE_PSOCK_STATS(msg_too_big);
+	SAVE_PSOCK_STATS(msg_timeouts);
+	SAVE_PSOCK_STATS(bad_hdr_len);
+	SAVE_PSOCK_STATS(aborts);
+	SAVE_PSOCK_STATS(interrupted);
+	SAVE_PSOCK_STATS(unrecov_intr);
 #undef SAVE_PSOCK_STATS
 
 }
@@ -135,8 +137,11 @@ static inline void aggregate_strp_stats(struct strp_aggr_stats *stats,
 void strp_done(struct strparser *strp);
 void strp_stop(struct strparser *strp);
 void strp_check_rcv(struct strparser *strp);
-int strp_init(struct strparser *strp, struct sock *csk,
+int strp_init(struct strparser *strp, struct sock *sk,
 	      struct strp_callbacks *cb);
 void strp_data_ready(struct strparser *strp);
+int strp_process(struct strparser *strp, struct sk_buff *orig_skb,
+		 unsigned int orig_offset, size_t orig_len,
+		 size_t max_msg_size, long timeo);
 
 #endif /* __NET_STRPARSER_H_ */
diff --git a/net/kcm/kcmproc.c b/net/kcm/kcmproc.c
index c343ac60bf50..c748e8a6a72c 100644
--- a/net/kcm/kcmproc.c
+++ b/net/kcm/kcmproc.c
@@ -155,8 +155,8 @@ static void kcm_format_psock(struct kcm_psock *psock, struct seq_file *seq,
 	seq_printf(seq,
 		   "   psock-%-5u %-10llu %-16llu %-10llu %-16llu %-8d %-8d %-8d %-8d ",
 		   psock->index,
-		   psock->strp.stats.rx_msgs,
-		   psock->strp.stats.rx_bytes,
+		   psock->strp.stats.msgs,
+		   psock->strp.stats.bytes,
 		   psock->stats.tx_msgs,
 		   psock->stats.tx_bytes,
 		   psock->sk->sk_receive_queue.qlen,
@@ -170,22 +170,22 @@ static void kcm_format_psock(struct kcm_psock *psock, struct seq_file *seq,
 	if (psock->tx_stopped)
 		seq_puts(seq, "TxStop ");
 
-	if (psock->strp.rx_stopped)
+	if (psock->strp.stopped)
 		seq_puts(seq, "RxStop ");
 
 	if (psock->tx_kcm)
 		seq_printf(seq, "Rsvd-%d ", psock->tx_kcm->index);
 
-	if (!psock->strp.rx_paused && !psock->ready_rx_msg) {
+	if (!psock->strp.paused && !psock->ready_rx_msg) {
 		if (psock->sk->sk_receive_queue.qlen) {
-			if (psock->strp.rx_need_bytes)
+			if (psock->strp.need_bytes)
 				seq_printf(seq, "RxWait=%u ",
-					   psock->strp.rx_need_bytes);
+					   psock->strp.need_bytes);
 			else
 				seq_printf(seq, "RxWait ");
 		}
 	} else  {
-		if (psock->strp.rx_paused)
+		if (psock->strp.paused)
 			seq_puts(seq, "RxPause ");
 
 		if (psock->ready_rx_msg)
@@ -371,20 +371,20 @@ static int kcm_stats_seq_show(struct seq_file *seq, void *v)
 	seq_printf(seq,
 		   "%-8s %-10llu %-16llu %-10llu %-16llu %-10llu %-10llu %-10u %-10u %-10u %-10u %-10u %-10u %-10u %-10u %-10u\n",
 		   "",
-		   strp_stats.rx_msgs,
-		   strp_stats.rx_bytes,
+		   strp_stats.msgs,
+		   strp_stats.bytes,
 		   psock_stats.tx_msgs,
 		   psock_stats.tx_bytes,
 		   psock_stats.reserved,
 		   psock_stats.unreserved,
-		   strp_stats.rx_aborts,
-		   strp_stats.rx_interrupted,
-		   strp_stats.rx_unrecov_intr,
-		   strp_stats.rx_mem_fail,
-		   strp_stats.rx_need_more_hdr,
-		   strp_stats.rx_bad_hdr_len,
-		   strp_stats.rx_msg_too_big,
-		   strp_stats.rx_msg_timeouts,
+		   strp_stats.aborts,
+		   strp_stats.interrupted,
+		   strp_stats.unrecov_intr,
+		   strp_stats.mem_fail,
+		   strp_stats.need_more_hdr,
+		   strp_stats.bad_hdr_len,
+		   strp_stats.msg_too_big,
+		   strp_stats.msg_timeouts,
 		   psock_stats.tx_aborts);
 
 	return 0;
diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index da49191f7ad0..88ce73288247 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@ -96,12 +96,12 @@ static void kcm_update_rx_mux_stats(struct kcm_mux *mux,
 				    struct kcm_psock *psock)
 {
 	STRP_STATS_ADD(mux->stats.rx_bytes,
-		       psock->strp.stats.rx_bytes -
+		       psock->strp.stats.bytes -
 		       psock->saved_rx_bytes);
 	mux->stats.rx_msgs +=
-		psock->strp.stats.rx_msgs - psock->saved_rx_msgs;
-	psock->saved_rx_msgs = psock->strp.stats.rx_msgs;
-	psock->saved_rx_bytes = psock->strp.stats.rx_bytes;
+		psock->strp.stats.msgs - psock->saved_rx_msgs;
+	psock->saved_rx_msgs = psock->strp.stats.msgs;
+	psock->saved_rx_bytes = psock->strp.stats.bytes;
 }
 
 static void kcm_update_tx_mux_stats(struct kcm_mux *mux,
@@ -1118,7 +1118,7 @@ static int kcm_recvmsg(struct socket *sock, struct msghdr *msg,
 	struct kcm_sock *kcm = kcm_sk(sk);
 	int err = 0;
 	long timeo;
-	struct strp_rx_msg *rxm;
+	struct strp_msg *stm;
 	int copied = 0;
 	struct sk_buff *skb;
 
@@ -1132,26 +1132,26 @@ static int kcm_recvmsg(struct socket *sock, struct msghdr *msg,
 
 	/* Okay, have a message on the receive queue */
 
-	rxm = strp_rx_msg(skb);
+	stm = strp_msg(skb);
 
-	if (len > rxm->full_len)
-		len = rxm->full_len;
+	if (len > stm->full_len)
+		len = stm->full_len;
 
-	err = skb_copy_datagram_msg(skb, rxm->offset, msg, len);
+	err = skb_copy_datagram_msg(skb, stm->offset, msg, len);
 	if (err < 0)
 		goto out;
 
 	copied = len;
 	if (likely(!(flags & MSG_PEEK))) {
 		KCM_STATS_ADD(kcm->stats.rx_bytes, copied);
-		if (copied < rxm->full_len) {
+		if (copied < stm->full_len) {
 			if (sock->type == SOCK_DGRAM) {
 				/* Truncated message */
 				msg->msg_flags |= MSG_TRUNC;
 				goto msg_finished;
 			}
-			rxm->offset += copied;
-			rxm->full_len -= copied;
+			stm->offset += copied;
+			stm->full_len -= copied;
 		} else {
 msg_finished:
 			/* Finished with message */
@@ -1175,7 +1175,7 @@ static ssize_t kcm_splice_read(struct socket *sock, loff_t *ppos,
 	struct sock *sk = sock->sk;
 	struct kcm_sock *kcm = kcm_sk(sk);
 	long timeo;
-	struct strp_rx_msg *rxm;
+	struct strp_msg *stm;
 	int err = 0;
 	ssize_t copied;
 	struct sk_buff *skb;
@@ -1192,12 +1192,12 @@ static ssize_t kcm_splice_read(struct socket *sock, loff_t *ppos,
 
 	/* Okay, have a message on the receive queue */
 
-	rxm = strp_rx_msg(skb);
+	stm = strp_msg(skb);
 
-	if (len > rxm->full_len)
-		len = rxm->full_len;
+	if (len > stm->full_len)
+		len = stm->full_len;
 
-	copied = skb_splice_bits(skb, sk, rxm->offset, pipe, len, flags);
+	copied = skb_splice_bits(skb, sk, stm->offset, pipe, len, flags);
 	if (copied < 0) {
 		err = copied;
 		goto err_out;
@@ -1205,8 +1205,8 @@ static ssize_t kcm_splice_read(struct socket *sock, loff_t *ppos,
 
 	KCM_STATS_ADD(kcm->stats.rx_bytes, copied);
 
-	rxm->offset += copied;
-	rxm->full_len -= copied;
+	stm->offset += copied;
+	stm->full_len -= copied;
 
 	/* We have no way to return MSG_EOR. If all the bytes have been
 	 * read we still leave the message in the receive socket buffer.
diff --git a/net/strparser/strparser.c b/net/strparser/strparser.c
index b5c279b22680..0d18fbc6f870 100644
--- a/net/strparser/strparser.c
+++ b/net/strparser/strparser.c
@@ -29,44 +29,46 @@
 
 static struct workqueue_struct *strp_wq;
 
-struct _strp_rx_msg {
-	/* Internal cb structure. struct strp_rx_msg must be first for passing
+struct _strp_msg {
+	/* Internal cb structure. struct strp_msg must be first for passing
 	 * to upper layer.
 	 */
-	struct strp_rx_msg strp;
+	struct strp_msg strp;
 	int accum_len;
 	int early_eaten;
 };
 
-static inline struct _strp_rx_msg *_strp_rx_msg(struct sk_buff *skb)
+static inline struct _strp_msg *_strp_msg(struct sk_buff *skb)
 {
-	return (struct _strp_rx_msg *)((void *)skb->cb +
+	return (struct _strp_msg *)((void *)skb->cb +
 		offsetof(struct qdisc_skb_cb, data));
 }
 
 /* Lower lock held */
-static void strp_abort_rx_strp(struct strparser *strp, int err)
+static void strp_abort_strp(struct strparser *strp, int err)
 {
-	struct sock *csk = strp->sk;
-
 	/* Unrecoverable error in receive */
 
-	del_timer(&strp->rx_msg_timer);
+	del_timer(&strp->msg_timer);
 
-	if (strp->rx_stopped)
+	if (strp->stopped)
 		return;
 
-	strp->rx_stopped = 1;
+	strp->stopped = 1;
+
+	if (strp->sk) {
+		struct sock *sk = strp->sk;
 
-	/* Report an error on the lower socket */
-	csk->sk_err = err;
-	csk->sk_error_report(csk);
+		/* Report an error on the lower socket */
+		sk->sk_err = err;
+		sk->sk_error_report(sk);
+	}
 }
 
-static void strp_start_rx_timer(struct strparser *strp)
+static void strp_start_timer(struct strparser *strp, long timeo)
 {
-	if (strp->sk->sk_rcvtimeo)
-		mod_timer(&strp->rx_msg_timer, strp->sk->sk_rcvtimeo);
+	if (timeo)
+		mod_timer(&strp->msg_timer, timeo);
 }
 
 /* Lower lock held */
@@ -74,46 +76,55 @@ static void strp_parser_err(struct strparser *strp, int err,
 			    read_descriptor_t *desc)
 {
 	desc->error = err;
-	kfree_skb(strp->rx_skb_head);
-	strp->rx_skb_head = NULL;
+	kfree_skb(strp->skb_head);
+	strp->skb_head = NULL;
 	strp->cb.abort_parser(strp, err);
 }
 
 static inline int strp_peek_len(struct strparser *strp)
 {
-	struct socket *sock = strp->sk->sk_socket;
+	if (strp->sk) {
+		struct socket *sock = strp->sk->sk_socket;
+
+		return sock->ops->peek_len(sock);
+	}
+
+	/* If we don't have an associated socket there's nothing to peek.
+	 * Return int max to avoid stopping the strparser.
+	 */
 
-	return sock->ops->peek_len(sock);
+	return INT_MAX;
 }
 
 /* Lower socket lock held */
-static int strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb,
-		     unsigned int orig_offset, size_t orig_len)
+static int __strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb,
+		       unsigned int orig_offset, size_t orig_len,
+		       size_t max_msg_size, long timeo)
 {
 	struct strparser *strp = (struct strparser *)desc->arg.data;
-	struct _strp_rx_msg *rxm;
+	struct _strp_msg *stm;
 	struct sk_buff *head, *skb;
 	size_t eaten = 0, cand_len;
 	ssize_t extra;
 	int err;
 	bool cloned_orig = false;
 
-	if (strp->rx_paused)
+	if (strp->paused)
 		return 0;
 
-	head = strp->rx_skb_head;
+	head = strp->skb_head;
 	if (head) {
 		/* Message already in progress */
 
-		rxm = _strp_rx_msg(head);
-		if (unlikely(rxm->early_eaten)) {
+		stm = _strp_msg(head);
+		if (unlikely(stm->early_eaten)) {
 			/* Already some number of bytes on the receive sock
-			 * data saved in rx_skb_head, just indicate they
+			 * data saved in skb_head, just indicate they
 			 * are consumed.
 			 */
-			eaten = orig_len <= rxm->early_eaten ?
-				orig_len : rxm->early_eaten;
-			rxm->early_eaten -= eaten;
+			eaten = orig_len <= stm->early_eaten ?
+				orig_len : stm->early_eaten;
+			stm->early_eaten -= eaten;
 
 			return eaten;
 		}
@@ -126,12 +137,12 @@ static int strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb,
 			 */
 			orig_skb = skb_clone(orig_skb, GFP_ATOMIC);
 			if (!orig_skb) {
-				STRP_STATS_INCR(strp->stats.rx_mem_fail);
+				STRP_STATS_INCR(strp->stats.mem_fail);
 				desc->error = -ENOMEM;
 				return 0;
 			}
 			if (!pskb_pull(orig_skb, orig_offset)) {
-				STRP_STATS_INCR(strp->stats.rx_mem_fail);
+				STRP_STATS_INCR(strp->stats.mem_fail);
 				kfree_skb(orig_skb);
 				desc->error = -ENOMEM;
 				return 0;
@@ -140,13 +151,13 @@ static int strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb,
 			orig_offset = 0;
 		}
 
-		if (!strp->rx_skb_nextp) {
+		if (!strp->skb_nextp) {
 			/* We are going to append to the frags_list of head.
 			 * Need to unshare the frag_list.
 			 */
 			err = skb_unclone(head, GFP_ATOMIC);
 			if (err) {
-				STRP_STATS_INCR(strp->stats.rx_mem_fail);
+				STRP_STATS_INCR(strp->stats.mem_fail);
 				desc->error = err;
 				return 0;
 			}
@@ -165,20 +176,20 @@ static int strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb,
 
 				skb = alloc_skb(0, GFP_ATOMIC);
 				if (!skb) {
-					STRP_STATS_INCR(strp->stats.rx_mem_fail);
+					STRP_STATS_INCR(strp->stats.mem_fail);
 					desc->error = -ENOMEM;
 					return 0;
 				}
 				skb->len = head->len;
 				skb->data_len = head->len;
 				skb->truesize = head->truesize;
-				*_strp_rx_msg(skb) = *_strp_rx_msg(head);
-				strp->rx_skb_nextp = &head->next;
+				*_strp_msg(skb) = *_strp_msg(head);
+				strp->skb_nextp = &head->next;
 				skb_shinfo(skb)->frag_list = head;
-				strp->rx_skb_head = skb;
+				strp->skb_head = skb;
 				head = skb;
 			} else {
-				strp->rx_skb_nextp =
+				strp->skb_nextp =
 				    &skb_shinfo(head)->frag_list;
 			}
 		}
@@ -188,112 +199,112 @@ static int strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb,
 		/* Always clone since we will consume something */
 		skb = skb_clone(orig_skb, GFP_ATOMIC);
 		if (!skb) {
-			STRP_STATS_INCR(strp->stats.rx_mem_fail);
+			STRP_STATS_INCR(strp->stats.mem_fail);
 			desc->error = -ENOMEM;
 			break;
 		}
 
 		cand_len = orig_len - eaten;
 
-		head = strp->rx_skb_head;
+		head = strp->skb_head;
 		if (!head) {
 			head = skb;
-			strp->rx_skb_head = head;
-			/* Will set rx_skb_nextp on next packet if needed */
-			strp->rx_skb_nextp = NULL;
-			rxm = _strp_rx_msg(head);
-			memset(rxm, 0, sizeof(*rxm));
-			rxm->strp.offset = orig_offset + eaten;
+			strp->skb_head = head;
+			/* Will set skb_nextp on next packet if needed */
+			strp->skb_nextp = NULL;
+			stm = _strp_msg(head);
+			memset(stm, 0, sizeof(*stm));
+			stm->strp.offset = orig_offset + eaten;
 		} else {
 			/* Unclone since we may be appending to an skb that we
 			 * already share a frag_list with.
 			 */
 			err = skb_unclone(skb, GFP_ATOMIC);
 			if (err) {
-				STRP_STATS_INCR(strp->stats.rx_mem_fail);
+				STRP_STATS_INCR(strp->stats.mem_fail);
 				desc->error = err;
 				break;
 			}
 
-			rxm = _strp_rx_msg(head);
-			*strp->rx_skb_nextp = skb;
-			strp->rx_skb_nextp = &skb->next;
+			stm = _strp_msg(head);
+			*strp->skb_nextp = skb;
+			strp->skb_nextp = &skb->next;
 			head->data_len += skb->len;
 			head->len += skb->len;
 			head->truesize += skb->truesize;
 		}
 
-		if (!rxm->strp.full_len) {
+		if (!stm->strp.full_len) {
 			ssize_t len;
 
 			len = (*strp->cb.parse_msg)(strp, head);
 
 			if (!len) {
 				/* Need more header to determine length */
-				if (!rxm->accum_len) {
+				if (!stm->accum_len) {
 					/* Start RX timer for new message */
-					strp_start_rx_timer(strp);
+					strp_start_timer(strp, timeo);
 				}
-				rxm->accum_len += cand_len;
+				stm->accum_len += cand_len;
 				eaten += cand_len;
-				STRP_STATS_INCR(strp->stats.rx_need_more_hdr);
+				STRP_STATS_INCR(strp->stats.need_more_hdr);
 				WARN_ON(eaten != orig_len);
 				break;
 			} else if (len < 0) {
-				if (len == -ESTRPIPE && rxm->accum_len) {
+				if (len == -ESTRPIPE && stm->accum_len) {
 					len = -ENODATA;
-					strp->rx_unrecov_intr = 1;
+					strp->unrecov_intr = 1;
 				} else {
-					strp->rx_interrupted = 1;
+					strp->interrupted = 1;
 				}
 				strp_parser_err(strp, len, desc);
 				break;
-			} else if (len > strp->sk->sk_rcvbuf) {
+			} else if (len > max_msg_size) {
 				/* Message length exceeds maximum allowed */
-				STRP_STATS_INCR(strp->stats.rx_msg_too_big);
+				STRP_STATS_INCR(strp->stats.msg_too_big);
 				strp_parser_err(strp, -EMSGSIZE, desc);
 				break;
 			} else if (len <= (ssize_t)head->len -
-					  skb->len - rxm->strp.offset) {
+					  skb->len - stm->strp.offset) {
 				/* Length must be into new skb (and also
 				 * greater than zero)
 				 */
-				STRP_STATS_INCR(strp->stats.rx_bad_hdr_len);
+				STRP_STATS_INCR(strp->stats.bad_hdr_len);
 				strp_parser_err(strp, -EPROTO, desc);
 				break;
 			}
 
-			rxm->strp.full_len = len;
+			stm->strp.full_len = len;
 		}
 
-		extra = (ssize_t)(rxm->accum_len + cand_len) -
-			rxm->strp.full_len;
+		extra = (ssize_t)(stm->accum_len + cand_len) -
+			stm->strp.full_len;
 
 		if (extra < 0) {
 			/* Message not complete yet. */
-			if (rxm->strp.full_len - rxm->accum_len >
+			if (stm->strp.full_len - stm->accum_len >
 			    strp_peek_len(strp)) {
-				/* Don't have the whole messages in the socket
-				 * buffer. Set strp->rx_need_bytes to wait for
+				/* Don't have the whole message in the socket
+				 * buffer. Set strp->need_bytes to wait for
 				 * the rest of the message. Also, set "early
 				 * eaten" since we've already buffered the skb
 				 * but don't consume yet per strp_read_sock.
 				 */
 
-				if (!rxm->accum_len) {
+				if (!stm->accum_len) {
 					/* Start RX timer for new message */
-					strp_start_rx_timer(strp);
+					strp_start_timer(strp, timeo);
 				}
 
-				strp->rx_need_bytes = rxm->strp.full_len -
-						       rxm->accum_len;
-				rxm->accum_len += cand_len;
-				rxm->early_eaten = cand_len;
-				STRP_STATS_ADD(strp->stats.rx_bytes, cand_len);
+				strp->need_bytes = stm->strp.full_len -
+						       stm->accum_len;
+				stm->accum_len += cand_len;
+				stm->early_eaten = cand_len;
+				STRP_STATS_ADD(strp->stats.bytes, cand_len);
 				desc->count = 0; /* Stop reading socket */
 				break;
 			}
-			rxm->accum_len += cand_len;
+			stm->accum_len += cand_len;
 			eaten += cand_len;
 			WARN_ON(eaten != orig_len);
 			break;
@@ -308,14 +319,14 @@ static int strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb,
 		eaten += (cand_len - extra);
 
 		/* Hurray, we have a new message! */
-		del_timer(&strp->rx_msg_timer);
-		strp->rx_skb_head = NULL;
-		STRP_STATS_INCR(strp->stats.rx_msgs);
+		del_timer(&strp->msg_timer);
+		strp->skb_head = NULL;
+		STRP_STATS_INCR(strp->stats.msgs);
 
 		/* Give skb to upper layer */
 		strp->cb.rcv_msg(strp, head);
 
-		if (unlikely(strp->rx_paused)) {
+		if (unlikely(strp->paused)) {
 			/* Upper layer paused strp */
 			break;
 		}
@@ -324,11 +335,33 @@ static int strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb,
 	if (cloned_orig)
 		kfree_skb(orig_skb);
 
-	STRP_STATS_ADD(strp->stats.rx_bytes, eaten);
+	STRP_STATS_ADD(strp->stats.bytes, eaten);
 
 	return eaten;
 }
 
+int strp_process(struct strparser *strp, struct sk_buff *orig_skb,
+		 unsigned int orig_offset, size_t orig_len,
+		 size_t max_msg_size, long timeo)
+{
+	read_descriptor_t desc; /* Dummy arg to strp_recv */
+
+	desc.arg.data = strp;
+
+	return __strp_recv(&desc, orig_skb, orig_offset, orig_len,
+			   max_msg_size, timeo);
+}
+EXPORT_SYMBOL_GPL(strp_process);
+
+static int strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb,
+		     unsigned int orig_offset, size_t orig_len)
+{
+	struct strparser *strp = (struct strparser *)desc->arg.data;
+
+	return __strp_recv(desc, orig_skb, orig_offset, orig_len,
+			   strp->sk->sk_rcvbuf, strp->sk->sk_rcvtimeo);
+}
+
 static int default_read_sock_done(struct strparser *strp, int err)
 {
 	return err;
@@ -355,101 +388,129 @@ static int strp_read_sock(struct strparser *strp)
 /* Lower sock lock held */
 void strp_data_ready(struct strparser *strp)
 {
-	if (unlikely(strp->rx_stopped))
+	if (unlikely(strp->stopped))
 		return;
 
-	/* This check is needed to synchronize with do_strp_rx_work.
-	 * do_strp_rx_work acquires a process lock (lock_sock) whereas
+	/* This check is needed to synchronize with do_strp_work.
+	 * do_strp_work acquires a process lock (lock_sock) whereas
 	 * the lock held here is bh_lock_sock. The two locks can be
 	 * held by different threads at the same time, but bh_lock_sock
 	 * allows a thread in BH context to safely check if the process
 	 * lock is held. In this case, if the lock is held, queue work.
 	 */
 	if (sock_owned_by_user(strp->sk)) {
-		queue_work(strp_wq, &strp->rx_work);
+		queue_work(strp_wq, &strp->work);
 		return;
 	}
 
-	if (strp->rx_paused)
+	if (strp->paused)
 		return;
 
-	if (strp->rx_need_bytes) {
-		if (strp_peek_len(strp) >= strp->rx_need_bytes)
-			strp->rx_need_bytes = 0;
+	if (strp->need_bytes) {
+		if (strp_peek_len(strp) >= strp->need_bytes)
+			strp->need_bytes = 0;
 		else
 			return;
 	}
 
 	if (strp_read_sock(strp) == -ENOMEM)
-		queue_work(strp_wq, &strp->rx_work);
+		queue_work(strp_wq, &strp->work);
 }
 EXPORT_SYMBOL_GPL(strp_data_ready);
 
-static void do_strp_rx_work(struct strparser *strp)
+static void do_strp_work(struct strparser *strp)
 {
 	read_descriptor_t rd_desc;
-	struct sock *csk = strp->sk;
 
 	/* We need the read lock to synchronize with strp_data_ready. We
 	 * need the socket lock for calling strp_read_sock.
 	 */
-	lock_sock(csk);
+	strp->cb.lock(strp);
 
-	if (unlikely(strp->rx_stopped))
+	if (unlikely(strp->stopped))
 		goto out;
 
-	if (strp->rx_paused)
+	if (strp->paused)
 		goto out;
 
 	rd_desc.arg.data = strp;
 
 	if (strp_read_sock(strp) == -ENOMEM)
-		queue_work(strp_wq, &strp->rx_work);
+		queue_work(strp_wq, &strp->work);
 
 out:
-	release_sock(csk);
+	strp->cb.unlock(strp);
 }
 
-static void strp_rx_work(struct work_struct *w)
+static void strp_work(struct work_struct *w)
 {
-	do_strp_rx_work(container_of(w, struct strparser, rx_work));
+	do_strp_work(container_of(w, struct strparser, work));
 }
 
-static void strp_rx_msg_timeout(unsigned long arg)
+static void strp_msg_timeout(unsigned long arg)
 {
 	struct strparser *strp = (struct strparser *)arg;
 
 	/* Message assembly timed out */
-	STRP_STATS_INCR(strp->stats.rx_msg_timeouts);
-	lock_sock(strp->sk);
+	STRP_STATS_INCR(strp->stats.msg_timeouts);
+	strp->cb.lock(strp);
 	strp->cb.abort_parser(strp, ETIMEDOUT);
+	strp->cb.unlock(strp);
+}
+
+static void strp_sock_lock(struct strparser *strp)
+{
+	lock_sock(strp->sk);
+}
+
+static void strp_sock_unlock(struct strparser *strp)
+{
 	release_sock(strp->sk);
 }
 
-int strp_init(struct strparser *strp, struct sock *csk,
+int strp_init(struct strparser *strp, struct sock *sk,
 	      struct strp_callbacks *cb)
 {
-	struct socket *sock = csk->sk_socket;
 
 	if (!cb || !cb->rcv_msg || !cb->parse_msg)
 		return -EINVAL;
 
-	if (!sock->ops->read_sock || !sock->ops->peek_len)
-		return -EAFNOSUPPORT;
+	/* The sk (sock) arg determines the mode of the stream parser.
+	 *
+	 * If the sock is set then the strparser is in receive callback mode.
+	 * The upper layer calls strp_data_ready to kick receive processing
+	 * and strparser calls the read_sock function on the socket to
+	 * get packets.
+	 *
+	 * If the sock is not set then the strparser is in general mode.
+	 * The upper layer calls strp_process for each skb to be parsed.
+	 */
 
-	memset(strp, 0, sizeof(*strp));
+	if (sk) {
+		struct socket *sock = sk->sk_socket;
 
-	strp->sk = csk;
+		if (!sock->ops->read_sock || !sock->ops->peek_len)
+			return -EAFNOSUPPORT;
+	} else {
+		if (!cb->lock || !cb->unlock)
+			return -EINVAL;
+	}
 
-	setup_timer(&strp->rx_msg_timer, strp_rx_msg_timeout,
-		    (unsigned long)strp);
+	memset(strp, 0, sizeof(*strp));
 
-	INIT_WORK(&strp->rx_work, strp_rx_work);
+	strp->sk = sk;
 
+	strp->cb.lock = cb->lock ? : strp_sock_lock;
+	strp->cb.unlock = cb->unlock ? : strp_sock_unlock;
 	strp->cb.rcv_msg = cb->rcv_msg;
 	strp->cb.parse_msg = cb->parse_msg;
 	strp->cb.read_sock_done = cb->read_sock_done ? : default_read_sock_done;
-	strp->cb.abort_parser = cb->abort_parser ? : strp_abort_rx_strp;
+	strp->cb.abort_parser = cb->abort_parser ? : strp_abort_strp;
+
+	setup_timer(&strp->msg_timer, strp_msg_timeout,
+		    (unsigned long)strp);
+
+	INIT_WORK(&strp->work, strp_work);
 
 	return 0;
 }
@@ -457,12 +518,12 @@ EXPORT_SYMBOL_GPL(strp_init);
 
 void strp_unpause(struct strparser *strp)
 {
-	strp->rx_paused = 0;
+	strp->paused = 0;
 
-	/* Sync setting rx_paused with RX work */
+	/* Sync setting paused with RX work */
 	smp_mb();
 
-	queue_work(strp_wq, &strp->rx_work);
+	queue_work(strp_wq, &strp->work);
 }
 EXPORT_SYMBOL_GPL(strp_unpause);
 
@@ -471,27 +532,27 @@ EXPORT_SYMBOL_GPL(strp_unpause);
  */
 void strp_done(struct strparser *strp)
 {
-	WARN_ON(!strp->rx_stopped);
+	WARN_ON(!strp->stopped);
 
-	del_timer_sync(&strp->rx_msg_timer);
-	cancel_work_sync(&strp->rx_work);
+	del_timer_sync(&strp->msg_timer);
+	cancel_work_sync(&strp->work);
 
-	if (strp->rx_skb_head) {
-		kfree_skb(strp->rx_skb_head);
-		strp->rx_skb_head = NULL;
+	if (strp->skb_head) {
+		kfree_skb(strp->skb_head);
+		strp->skb_head = NULL;
 	}
 }
 EXPORT_SYMBOL_GPL(strp_done);
 
 void strp_stop(struct strparser *strp)
 {
-	strp->rx_stopped = 1;
+	strp->stopped = 1;
 }
 EXPORT_SYMBOL_GPL(strp_stop);
 
 void strp_check_rcv(struct strparser *strp)
 {
-	queue_work(strp_wq, &strp->rx_work);
+	queue_work(strp_wq, &strp->work);
 }
 EXPORT_SYMBOL_GPL(strp_check_rcv);
 
-- 
cgit v1.2.3


From 46587e4a312780a63132483bc8678c397eca6aff Mon Sep 17 00:00:00 2001
From: Vivien Didelot <vivien.didelot@savoirfairelinux.com>
Date: Tue, 1 Aug 2017 16:32:39 -0400
Subject: net: dsa: remove PHY device argument from .set_eee

The DSA switch operations for EEE are only meant to configure a port's
MAC EEE settings. The port's PHY EEE settings are accessed by the DSA
layer and must be made available via a proper PHY driver.

In order to reduce this confusion, remove the phy_device argument from
the .set_eee operation.

Signed-off-by: Vivien Didelot <vivien.didelot@savoirfairelinux.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/bcm_sf2.c        |  1 -
 drivers/net/dsa/mv88e6xxx/chip.c |  2 +-
 drivers/net/dsa/qca8k.c          | 14 +++-----------
 include/net/dsa.h                |  1 -
 net/dsa/slave.c                  |  2 +-
 5 files changed, 5 insertions(+), 15 deletions(-)

(limited to 'include/net')

diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c
index 9d10aac8f241..ce886345d8d2 100644
--- a/drivers/net/dsa/bcm_sf2.c
+++ b/drivers/net/dsa/bcm_sf2.c
@@ -353,7 +353,6 @@ static int bcm_sf2_sw_get_eee(struct dsa_switch *ds, int port,
 }
 
 static int bcm_sf2_sw_set_eee(struct dsa_switch *ds, int port,
-			      struct phy_device *phydev,
 			      struct ethtool_eee *e)
 {
 	struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index 647d5d45c1d6..aaa96487f21f 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -850,7 +850,7 @@ static int mv88e6xxx_get_eee(struct dsa_switch *ds, int port,
 }
 
 static int mv88e6xxx_set_eee(struct dsa_switch *ds, int port,
-			     struct phy_device *phydev, struct ethtool_eee *e)
+			     struct ethtool_eee *e)
 {
 	struct mv88e6xxx_chip *chip = ds->priv;
 	int err;
diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c
index bfe0172ae6cc..e209e229ed4c 100644
--- a/drivers/net/dsa/qca8k.c
+++ b/drivers/net/dsa/qca8k.c
@@ -637,8 +637,8 @@ qca8k_get_sset_count(struct dsa_switch *ds)
 	return ARRAY_SIZE(ar8327_mib);
 }
 
-static void
-qca8k_eee_enable_set(struct dsa_switch *ds, int port, bool enable)
+static int
+qca8k_set_eee(struct dsa_switch *ds, int port, struct ethtool_eee *eee)
 {
 	struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv;
 	u32 lpi_en = QCA8K_REG_EEE_CTRL_LPI_EN(port);
@@ -646,20 +646,12 @@ qca8k_eee_enable_set(struct dsa_switch *ds, int port, bool enable)
 
 	mutex_lock(&priv->reg_mutex);
 	reg = qca8k_read(priv, QCA8K_REG_EEE_CTRL);
-	if (enable)
+	if (eee->eee_enabled)
 		reg |= lpi_en;
 	else
 		reg &= ~lpi_en;
 	qca8k_write(priv, QCA8K_REG_EEE_CTRL, reg);
 	mutex_unlock(&priv->reg_mutex);
-}
-
-static int
-qca8k_set_eee(struct dsa_switch *ds, int port,
-	      struct phy_device *phydev,
-	      struct ethtool_eee *e)
-{
-	qca8k_eee_enable_set(ds, port, e->eee_enabled);
 
 	return 0;
 }
diff --git a/include/net/dsa.h b/include/net/dsa.h
index 88da272d20d0..ce46db323394 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -335,7 +335,6 @@ struct dsa_switch_ops {
 	 * EEE setttings
 	 */
 	int	(*set_eee)(struct dsa_switch *ds, int port,
-			   struct phy_device *phydev,
 			   struct ethtool_eee *e);
 	int	(*get_eee)(struct dsa_switch *ds, int port,
 			   struct ethtool_eee *e);
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index ad5caaf384d7..9ddc584e70b0 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -655,7 +655,7 @@ static int dsa_slave_set_eee(struct net_device *dev, struct ethtool_eee *e)
 	if (!ds->ops->set_eee)
 		return -EOPNOTSUPP;
 
-	ret = ds->ops->set_eee(ds, p->dp->index, p->phy, e);
+	ret = ds->ops->set_eee(ds, p->dp->index, e);
 	if (ret)
 		return ret;
 
-- 
cgit v1.2.3


From 08f500610f39809c107f206cba1f799c98c38054 Mon Sep 17 00:00:00 2001
From: Vivien Didelot <vivien.didelot@savoirfairelinux.com>
Date: Tue, 1 Aug 2017 16:32:41 -0400
Subject: net: dsa: rename switch EEE ops

To avoid confusion with the PHY EEE settings, rename the .set_eee and
.get_eee ops to respectively .set_mac_eee and .get_mac_eee.

Signed-off-by: Vivien Didelot <vivien.didelot@savoirfairelinux.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/bcm_sf2.c        | 12 ++++++------
 drivers/net/dsa/mv88e6xxx/chip.c | 12 ++++++------
 drivers/net/dsa/qca8k.c          |  9 ++++-----
 include/net/dsa.h                | 10 +++++-----
 net/dsa/slave.c                  |  8 ++++----
 5 files changed, 25 insertions(+), 26 deletions(-)

(limited to 'include/net')

diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c
index ce886345d8d2..6bbfa6ea1efb 100644
--- a/drivers/net/dsa/bcm_sf2.c
+++ b/drivers/net/dsa/bcm_sf2.c
@@ -338,8 +338,8 @@ static int bcm_sf2_eee_init(struct dsa_switch *ds, int port,
 	return 1;
 }
 
-static int bcm_sf2_sw_get_eee(struct dsa_switch *ds, int port,
-			      struct ethtool_eee *e)
+static int bcm_sf2_sw_get_mac_eee(struct dsa_switch *ds, int port,
+				  struct ethtool_eee *e)
 {
 	struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
 	struct ethtool_eee *p = &priv->port_sts[port].eee;
@@ -352,8 +352,8 @@ static int bcm_sf2_sw_get_eee(struct dsa_switch *ds, int port,
 	return 0;
 }
 
-static int bcm_sf2_sw_set_eee(struct dsa_switch *ds, int port,
-			      struct ethtool_eee *e)
+static int bcm_sf2_sw_set_mac_eee(struct dsa_switch *ds, int port,
+				  struct ethtool_eee *e)
 {
 	struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
 	struct ethtool_eee *p = &priv->port_sts[port].eee;
@@ -1011,8 +1011,8 @@ static const struct dsa_switch_ops bcm_sf2_ops = {
 	.set_wol		= bcm_sf2_sw_set_wol,
 	.port_enable		= bcm_sf2_port_setup,
 	.port_disable		= bcm_sf2_port_disable,
-	.get_eee		= bcm_sf2_sw_get_eee,
-	.set_eee		= bcm_sf2_sw_set_eee,
+	.get_mac_eee		= bcm_sf2_sw_get_mac_eee,
+	.set_mac_eee		= bcm_sf2_sw_set_mac_eee,
 	.port_bridge_join	= b53_br_join,
 	.port_bridge_leave	= b53_br_leave,
 	.port_stp_state_set	= b53_br_set_stp_state,
diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index aa0c5493fb9d..521738c4cd17 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -810,15 +810,15 @@ static void mv88e6xxx_get_regs(struct dsa_switch *ds, int port,
 	mutex_unlock(&chip->reg_lock);
 }
 
-static int mv88e6xxx_get_eee(struct dsa_switch *ds, int port,
-			     struct ethtool_eee *e)
+static int mv88e6xxx_get_mac_eee(struct dsa_switch *ds, int port,
+				 struct ethtool_eee *e)
 {
 	/* Nothing to do on the port's MAC */
 	return 0;
 }
 
-static int mv88e6xxx_set_eee(struct dsa_switch *ds, int port,
-			     struct ethtool_eee *e)
+static int mv88e6xxx_set_mac_eee(struct dsa_switch *ds, int port,
+				 struct ethtool_eee *e)
 {
 	/* Nothing to do on the port's MAC */
 	return 0;
@@ -3890,8 +3890,8 @@ static const struct dsa_switch_ops mv88e6xxx_switch_ops = {
 	.get_sset_count		= mv88e6xxx_get_sset_count,
 	.port_enable		= mv88e6xxx_port_enable,
 	.port_disable		= mv88e6xxx_port_disable,
-	.set_eee		= mv88e6xxx_set_eee,
-	.get_eee		= mv88e6xxx_get_eee,
+	.get_mac_eee		= mv88e6xxx_get_mac_eee,
+	.set_mac_eee		= mv88e6xxx_set_mac_eee,
 	.get_eeprom_len		= mv88e6xxx_get_eeprom_len,
 	.get_eeprom		= mv88e6xxx_get_eeprom,
 	.set_eeprom		= mv88e6xxx_set_eeprom,
diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c
index e209e229ed4c..36c169b0c705 100644
--- a/drivers/net/dsa/qca8k.c
+++ b/drivers/net/dsa/qca8k.c
@@ -638,7 +638,7 @@ qca8k_get_sset_count(struct dsa_switch *ds)
 }
 
 static int
-qca8k_set_eee(struct dsa_switch *ds, int port, struct ethtool_eee *eee)
+qca8k_set_mac_eee(struct dsa_switch *ds, int port, struct ethtool_eee *eee)
 {
 	struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv;
 	u32 lpi_en = QCA8K_REG_EEE_CTRL_LPI_EN(port);
@@ -657,8 +657,7 @@ qca8k_set_eee(struct dsa_switch *ds, int port, struct ethtool_eee *eee)
 }
 
 static int
-qca8k_get_eee(struct dsa_switch *ds, int port,
-	      struct ethtool_eee *e)
+qca8k_get_mac_eee(struct dsa_switch *ds, int port, struct ethtool_eee *e)
 {
 	/* Nothing to do on the port's MAC */
 	return 0;
@@ -863,8 +862,8 @@ static const struct dsa_switch_ops qca8k_switch_ops = {
 	.phy_write		= qca8k_phy_write,
 	.get_ethtool_stats	= qca8k_get_ethtool_stats,
 	.get_sset_count		= qca8k_get_sset_count,
-	.get_eee		= qca8k_get_eee,
-	.set_eee		= qca8k_set_eee,
+	.get_mac_eee		= qca8k_get_mac_eee,
+	.set_mac_eee		= qca8k_set_mac_eee,
 	.port_enable		= qca8k_port_enable,
 	.port_disable		= qca8k_port_disable,
 	.port_stp_state_set	= qca8k_port_stp_state_set,
diff --git a/include/net/dsa.h b/include/net/dsa.h
index ce46db323394..0b1a0622b33c 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -332,12 +332,12 @@ struct dsa_switch_ops {
 				struct phy_device *phy);
 
 	/*
-	 * EEE setttings
+	 * Port's MAC EEE settings
 	 */
-	int	(*set_eee)(struct dsa_switch *ds, int port,
-			   struct ethtool_eee *e);
-	int	(*get_eee)(struct dsa_switch *ds, int port,
-			   struct ethtool_eee *e);
+	int	(*set_mac_eee)(struct dsa_switch *ds, int port,
+			       struct ethtool_eee *e);
+	int	(*get_mac_eee)(struct dsa_switch *ds, int port,
+			       struct ethtool_eee *e);
 
 	/* EEPROM access */
 	int	(*get_eeprom_len)(struct dsa_switch *ds);
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 9ddc584e70b0..cc4bad3dadb4 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -652,10 +652,10 @@ static int dsa_slave_set_eee(struct net_device *dev, struct ethtool_eee *e)
 	if (!p->phy)
 		return -ENODEV;
 
-	if (!ds->ops->set_eee)
+	if (!ds->ops->set_mac_eee)
 		return -EOPNOTSUPP;
 
-	ret = ds->ops->set_eee(ds, p->dp->index, e);
+	ret = ds->ops->set_mac_eee(ds, p->dp->index, e);
 	if (ret)
 		return ret;
 
@@ -678,10 +678,10 @@ static int dsa_slave_get_eee(struct net_device *dev, struct ethtool_eee *e)
 	if (!p->phy)
 		return -ENODEV;
 
-	if (!ds->ops->get_eee)
+	if (!ds->ops->get_mac_eee)
 		return -EOPNOTSUPP;
 
-	ret = ds->ops->get_eee(ds, p->dp->index, e);
+	ret = ds->ops->get_mac_eee(ds, p->dp->index, e);
 	if (ret)
 		return ret;
 
-- 
cgit v1.2.3


From ffdb5211da1c20354f1b40c204b6cf6c29c68161 Mon Sep 17 00:00:00 2001
From: Ilan Tayari <ilant@mellanox.com>
Date: Tue, 1 Aug 2017 12:49:08 +0300
Subject: xfrm: Auto-load xfrm offload modules

IPSec crypto offload depends on the protocol-specific
offload module (such as esp_offload.ko).

When the user installs an SA with crypto-offload, load
the offload module automatically, in the same way
that the protocol module is loaded (such as esp.ko)

Signed-off-by: Ilan Tayari <ilant@mellanox.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/net/xfrm.h      |  4 +++-
 net/ipv4/esp4_offload.c |  1 +
 net/ipv6/esp6_offload.c |  1 +
 net/xfrm/xfrm_device.c  |  2 +-
 net/xfrm/xfrm_state.c   | 16 ++++++++++++----
 net/xfrm/xfrm_user.c    |  2 +-
 6 files changed, 19 insertions(+), 7 deletions(-)

(limited to 'include/net')

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index afb4929d7232..5a360100136c 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -43,6 +43,8 @@
 	MODULE_ALIAS("xfrm-mode-" __stringify(family) "-" __stringify(encap))
 #define MODULE_ALIAS_XFRM_TYPE(family, proto) \
 	MODULE_ALIAS("xfrm-type-" __stringify(family) "-" __stringify(proto))
+#define MODULE_ALIAS_XFRM_OFFLOAD_TYPE(family, proto) \
+	MODULE_ALIAS("xfrm-offload-" __stringify(family) "-" __stringify(proto))
 
 #ifdef CONFIG_XFRM_STATISTICS
 #define XFRM_INC_STATS(net, field)	SNMP_INC_STATS((net)->mib.xfrm_statistics, field)
@@ -1558,7 +1560,7 @@ void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si);
 u32 xfrm_replay_seqhi(struct xfrm_state *x, __be32 net_seq);
 int xfrm_init_replay(struct xfrm_state *x);
 int xfrm_state_mtu(struct xfrm_state *x, int mtu);
-int __xfrm_init_state(struct xfrm_state *x, bool init_replay);
+int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload);
 int xfrm_init_state(struct xfrm_state *x);
 int xfrm_prepare_input(struct xfrm_state *x, struct sk_buff *skb);
 int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type);
diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c
index 05831dea00f4..aca1c85f0795 100644
--- a/net/ipv4/esp4_offload.c
+++ b/net/ipv4/esp4_offload.c
@@ -305,3 +305,4 @@ module_init(esp4_offload_init);
 module_exit(esp4_offload_exit);
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Steffen Klassert <steffen.klassert@secunet.com>");
+MODULE_ALIAS_XFRM_OFFLOAD_TYPE(AF_INET, XFRM_PROTO_ESP);
diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c
index eec3add177fe..8d4e2ba9163d 100644
--- a/net/ipv6/esp6_offload.c
+++ b/net/ipv6/esp6_offload.c
@@ -334,3 +334,4 @@ module_init(esp6_offload_init);
 module_exit(esp6_offload_exit);
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Steffen Klassert <steffen.klassert@secunet.com>");
+MODULE_ALIAS_XFRM_OFFLOAD_TYPE(AF_INET6, XFRM_PROTO_ESP);
diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c
index 5cd7a244e88d..1904127f5fb8 100644
--- a/net/xfrm/xfrm_device.c
+++ b/net/xfrm/xfrm_device.c
@@ -63,7 +63,7 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x,
 	xfrm_address_t *daddr;
 
 	if (!x->type_offload)
-		return 0;
+		return -EINVAL;
 
 	/* We don't yet support UDP encapsulation, TFC padding and ESN. */
 	if (x->encap || x->tfcpad || (x->props.flags & XFRM_STATE_ESN))
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 82cbbce69b79..a41e2ef789c0 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -296,12 +296,14 @@ int xfrm_unregister_type_offload(const struct xfrm_type_offload *type,
 }
 EXPORT_SYMBOL(xfrm_unregister_type_offload);
 
-static const struct xfrm_type_offload *xfrm_get_type_offload(u8 proto, unsigned short family)
+static const struct xfrm_type_offload *
+xfrm_get_type_offload(u8 proto, unsigned short family, bool try_load)
 {
 	struct xfrm_state_afinfo *afinfo;
 	const struct xfrm_type_offload **typemap;
 	const struct xfrm_type_offload *type;
 
+retry:
 	afinfo = xfrm_state_get_afinfo(family);
 	if (unlikely(afinfo == NULL))
 		return NULL;
@@ -311,6 +313,12 @@ static const struct xfrm_type_offload *xfrm_get_type_offload(u8 proto, unsigned
 	if ((type && !try_module_get(type->owner)))
 		type = NULL;
 
+	if (!type && try_load) {
+		request_module("xfrm-offload-%d-%d", family, proto);
+		try_load = 0;
+		goto retry;
+	}
+
 	rcu_read_unlock();
 	return type;
 }
@@ -2165,7 +2173,7 @@ int xfrm_state_mtu(struct xfrm_state *x, int mtu)
 	return mtu - x->props.header_len;
 }
 
-int __xfrm_init_state(struct xfrm_state *x, bool init_replay)
+int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload)
 {
 	struct xfrm_state_afinfo *afinfo;
 	struct xfrm_mode *inner_mode;
@@ -2230,7 +2238,7 @@ int __xfrm_init_state(struct xfrm_state *x, bool init_replay)
 	if (x->type == NULL)
 		goto error;
 
-	x->type_offload = xfrm_get_type_offload(x->id.proto, family);
+	x->type_offload = xfrm_get_type_offload(x->id.proto, family, offload);
 
 	err = x->type->init_state(x);
 	if (err)
@@ -2258,7 +2266,7 @@ EXPORT_SYMBOL(__xfrm_init_state);
 
 int xfrm_init_state(struct xfrm_state *x)
 {
-	return __xfrm_init_state(x, true);
+	return __xfrm_init_state(x, true, false);
 }
 
 EXPORT_SYMBOL(xfrm_init_state);
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 1b539b7dcfab..ffe8d5ef09eb 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -584,7 +584,7 @@ static struct xfrm_state *xfrm_state_construct(struct net *net,
 
 	xfrm_mark_get(attrs, &x->mark);
 
-	err = __xfrm_init_state(x, false);
+	err = __xfrm_init_state(x, false, attrs[XFRMA_OFFLOAD_DEV]);
 	if (err)
 		goto error;
 
-- 
cgit v1.2.3


From f70f250a77313b542531e1ff7a449cd0ccd83ec0 Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Tue, 1 Aug 2017 12:49:10 +0300
Subject: net: Allow IPsec GSO for local sockets

This patch allows local sockets to make use of XFRM GSO code path.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: Ilan Tayari <ilant@mellanox.com>
---
 include/net/xfrm.h | 19 +++++++++++++++++++
 net/core/sock.c    |  2 +-
 2 files changed, 20 insertions(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 5a360100136c..18d7de34a5c3 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1858,6 +1858,20 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x,
 		       struct xfrm_user_offload *xuo);
 bool xfrm_dev_offload_ok(struct sk_buff *skb, struct xfrm_state *x);
 
+static inline bool xfrm_dst_offload_ok(struct dst_entry *dst)
+{
+	struct xfrm_state *x = dst->xfrm;
+
+	if (!x || !x->type_offload)
+		return false;
+
+	if (x->xso.offload_handle && (x->xso.dev == dst->path->dev) &&
+	    !dst->child->xfrm)
+		return true;
+
+	return false;
+}
+
 static inline void xfrm_dev_state_delete(struct xfrm_state *x)
 {
 	struct xfrm_state_offload *xso = &x->xso;
@@ -1900,6 +1914,11 @@ static inline bool xfrm_dev_offload_ok(struct sk_buff *skb, struct xfrm_state *x
 {
 	return false;
 }
+
+static inline bool xfrm_dst_offload_ok(struct dst_entry *dst)
+{
+	return false;
+}
 #endif
 
 static inline int xfrm_mark_get(struct nlattr **attrs, struct xfrm_mark *m)
diff --git a/net/core/sock.c b/net/core/sock.c
index 742f68c9c84a..564f835f408a 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1757,7 +1757,7 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
 		sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE;
 	sk->sk_route_caps &= ~sk->sk_route_nocaps;
 	if (sk_can_gso(sk)) {
-		if (dst->header_len) {
+		if (dst->header_len && !xfrm_dst_offload_ok(dst)) {
 			sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
 		} else {
 			sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
-- 
cgit v1.2.3


From 2a04aabf5c96c9e25df488949b21223bcc623815 Mon Sep 17 00:00:00 2001
From: Julia Lawall <julia.lawall@lip6.fr>
Date: Tue, 1 Aug 2017 12:25:01 +0200
Subject: netfilter: constify nf_conntrack_l3/4proto parameters

When a nf_conntrack_l3/4proto parameter is not on the left hand side
of an assignment, its address is not taken, and it is not passed to a
function that may modify its fields, then it can be declared as const.

This change is useful from a documentation point of view, and can
possibly facilitate making some nf_conntrack_l3/4proto structures const
subsequently.

Done with the help of Coccinelle.

Signed-off-by: Julia Lawall <Julia.Lawall@lip6.fr>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_l4proto.h | 14 +++++++-------
 include/net/netfilter/nf_conntrack_timeout.h |  2 +-
 net/netfilter/nf_conntrack_core.c            |  8 ++++----
 net/netfilter/nf_conntrack_netlink.c         |  6 +++---
 net/netfilter/nf_conntrack_proto.c           | 24 ++++++++++++------------
 net/netfilter/nfnetlink_cttimeout.c          |  5 +++--
 6 files changed, 30 insertions(+), 29 deletions(-)

(limited to 'include/net')

diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h
index 7032e044bbe2..b6e27cafb1d9 100644
--- a/include/net/netfilter/nf_conntrack_l4proto.h
+++ b/include/net/netfilter/nf_conntrack_l4proto.h
@@ -125,23 +125,23 @@ struct nf_conntrack_l4proto *__nf_ct_l4proto_find(u_int16_t l3proto,
 
 struct nf_conntrack_l4proto *nf_ct_l4proto_find_get(u_int16_t l3proto,
 						    u_int8_t l4proto);
-void nf_ct_l4proto_put(struct nf_conntrack_l4proto *p);
+void nf_ct_l4proto_put(const struct nf_conntrack_l4proto *p);
 
 /* Protocol pernet registration. */
 int nf_ct_l4proto_pernet_register_one(struct net *net,
-				      struct nf_conntrack_l4proto *proto);
+				const struct nf_conntrack_l4proto *proto);
 void nf_ct_l4proto_pernet_unregister_one(struct net *net,
-					 struct nf_conntrack_l4proto *proto);
+				const struct nf_conntrack_l4proto *proto);
 int nf_ct_l4proto_pernet_register(struct net *net,
-				  struct nf_conntrack_l4proto *proto[],
+				  struct nf_conntrack_l4proto *const proto[],
 				  unsigned int num_proto);
 void nf_ct_l4proto_pernet_unregister(struct net *net,
-				     struct nf_conntrack_l4proto *proto[],
-				     unsigned int num_proto);
+				struct nf_conntrack_l4proto *const proto[],
+				unsigned int num_proto);
 
 /* Protocol global registration. */
 int nf_ct_l4proto_register_one(struct nf_conntrack_l4proto *proto);
-void nf_ct_l4proto_unregister_one(struct nf_conntrack_l4proto *proto);
+void nf_ct_l4proto_unregister_one(const struct nf_conntrack_l4proto *proto);
 int nf_ct_l4proto_register(struct nf_conntrack_l4proto *proto[],
 			   unsigned int num_proto);
 void nf_ct_l4proto_unregister(struct nf_conntrack_l4proto *proto[],
diff --git a/include/net/netfilter/nf_conntrack_timeout.h b/include/net/netfilter/nf_conntrack_timeout.h
index d40b89355fdd..b222957062b5 100644
--- a/include/net/netfilter/nf_conntrack_timeout.h
+++ b/include/net/netfilter/nf_conntrack_timeout.h
@@ -68,7 +68,7 @@ struct nf_conn_timeout *nf_ct_timeout_ext_add(struct nf_conn *ct,
 
 static inline unsigned int *
 nf_ct_timeout_lookup(struct net *net, struct nf_conn *ct,
-		     struct nf_conntrack_l4proto *l4proto)
+		     const struct nf_conntrack_l4proto *l4proto)
 {
 #ifdef CONFIG_NF_CONNTRACK_TIMEOUT
 	struct nf_conn_timeout *timeout_ext;
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 2bc499186186..f2f00eaf217d 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1176,8 +1176,8 @@ EXPORT_SYMBOL_GPL(nf_conntrack_free);
 static noinline struct nf_conntrack_tuple_hash *
 init_conntrack(struct net *net, struct nf_conn *tmpl,
 	       const struct nf_conntrack_tuple *tuple,
-	       struct nf_conntrack_l3proto *l3proto,
-	       struct nf_conntrack_l4proto *l4proto,
+	       const struct nf_conntrack_l3proto *l3proto,
+	       const struct nf_conntrack_l4proto *l4proto,
 	       struct sk_buff *skb,
 	       unsigned int dataoff, u32 hash)
 {
@@ -1288,8 +1288,8 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
 		  unsigned int dataoff,
 		  u_int16_t l3num,
 		  u_int8_t protonum,
-		  struct nf_conntrack_l3proto *l3proto,
-		  struct nf_conntrack_l4proto *l4proto)
+		  const struct nf_conntrack_l3proto *l3proto,
+		  const struct nf_conntrack_l4proto *l4proto)
 {
 	const struct nf_conntrack_zone *zone;
 	struct nf_conntrack_tuple tuple;
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 4922c8aefb2a..f4ca48817f66 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -61,8 +61,8 @@ MODULE_LICENSE("GPL");
 static char __initdata version[] = "0.93";
 
 static int ctnetlink_dump_tuples_proto(struct sk_buff *skb,
-				       const struct nf_conntrack_tuple *tuple,
-				       struct nf_conntrack_l4proto *l4proto)
+				const struct nf_conntrack_tuple *tuple,
+				const struct nf_conntrack_l4proto *l4proto)
 {
 	int ret = 0;
 	struct nlattr *nest_parms;
@@ -86,7 +86,7 @@ nla_put_failure:
 
 static int ctnetlink_dump_tuples_ip(struct sk_buff *skb,
 				    const struct nf_conntrack_tuple *tuple,
-				    struct nf_conntrack_l3proto *l3proto)
+				    const struct nf_conntrack_l3proto *l3proto)
 {
 	int ret = 0;
 	struct nlattr *nest_parms;
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index 7c89dade6fd3..27810cf816a6 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -188,7 +188,7 @@ nf_ct_l4proto_find_get(u_int16_t l3num, u_int8_t l4num)
 }
 EXPORT_SYMBOL_GPL(nf_ct_l4proto_find_get);
 
-void nf_ct_l4proto_put(struct nf_conntrack_l4proto *p)
+void nf_ct_l4proto_put(const struct nf_conntrack_l4proto *p)
 {
 	module_put(p->me);
 }
@@ -257,7 +257,7 @@ void nf_ct_l3proto_unregister(struct nf_conntrack_l3proto *proto)
 EXPORT_SYMBOL_GPL(nf_ct_l3proto_unregister);
 
 static struct nf_proto_net *nf_ct_l4proto_net(struct net *net,
-					      struct nf_conntrack_l4proto *l4proto)
+				const struct nf_conntrack_l4proto *l4proto)
 {
 	if (l4proto->get_net_proto) {
 		/* statically built-in protocols use static per-net */
@@ -272,7 +272,7 @@ static struct nf_proto_net *nf_ct_l4proto_net(struct net *net,
 static
 int nf_ct_l4proto_register_sysctl(struct net *net,
 				  struct nf_proto_net *pn,
-				  struct nf_conntrack_l4proto *l4proto)
+				  const struct nf_conntrack_l4proto *l4proto)
 {
 	int err = 0;
 
@@ -295,8 +295,8 @@ int nf_ct_l4proto_register_sysctl(struct net *net,
 
 static
 void nf_ct_l4proto_unregister_sysctl(struct net *net,
-				     struct nf_proto_net *pn,
-				     struct nf_conntrack_l4proto *l4proto)
+				struct nf_proto_net *pn,
+				const struct nf_conntrack_l4proto *l4proto)
 {
 #ifdef CONFIG_SYSCTL
 	if (pn->ctl_table_header != NULL)
@@ -366,7 +366,7 @@ out_unlock:
 EXPORT_SYMBOL_GPL(nf_ct_l4proto_register_one);
 
 int nf_ct_l4proto_pernet_register_one(struct net *net,
-				      struct nf_conntrack_l4proto *l4proto)
+				const struct nf_conntrack_l4proto *l4proto)
 {
 	int ret = 0;
 	struct nf_proto_net *pn = NULL;
@@ -391,7 +391,7 @@ out:
 }
 EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_register_one);
 
-static void __nf_ct_l4proto_unregister_one(struct nf_conntrack_l4proto *l4proto)
+static void __nf_ct_l4proto_unregister_one(const struct nf_conntrack_l4proto *l4proto)
 
 {
 	BUG_ON(l4proto->l3proto >= ARRAY_SIZE(nf_ct_protos));
@@ -404,7 +404,7 @@ static void __nf_ct_l4proto_unregister_one(struct nf_conntrack_l4proto *l4proto)
 			   &nf_conntrack_l4proto_generic);
 }
 
-void nf_ct_l4proto_unregister_one(struct nf_conntrack_l4proto *l4proto)
+void nf_ct_l4proto_unregister_one(const struct nf_conntrack_l4proto *l4proto)
 {
 	mutex_lock(&nf_ct_proto_mutex);
 	__nf_ct_l4proto_unregister_one(l4proto);
@@ -415,7 +415,7 @@ void nf_ct_l4proto_unregister_one(struct nf_conntrack_l4proto *l4proto)
 EXPORT_SYMBOL_GPL(nf_ct_l4proto_unregister_one);
 
 void nf_ct_l4proto_pernet_unregister_one(struct net *net,
-					 struct nf_conntrack_l4proto *l4proto)
+				const struct nf_conntrack_l4proto *l4proto)
 {
 	struct nf_proto_net *pn = nf_ct_l4proto_net(net, l4proto);
 
@@ -449,7 +449,7 @@ int nf_ct_l4proto_register(struct nf_conntrack_l4proto *l4proto[],
 EXPORT_SYMBOL_GPL(nf_ct_l4proto_register);
 
 int nf_ct_l4proto_pernet_register(struct net *net,
-				  struct nf_conntrack_l4proto *l4proto[],
+				  struct nf_conntrack_l4proto *const l4proto[],
 				  unsigned int num_proto)
 {
 	int ret = -EINVAL;
@@ -485,8 +485,8 @@ void nf_ct_l4proto_unregister(struct nf_conntrack_l4proto *l4proto[],
 EXPORT_SYMBOL_GPL(nf_ct_l4proto_unregister);
 
 void nf_ct_l4proto_pernet_unregister(struct net *net,
-				     struct nf_conntrack_l4proto *l4proto[],
-				     unsigned int num_proto)
+				struct nf_conntrack_l4proto *const l4proto[],
+				unsigned int num_proto)
 {
 	while (num_proto-- != 0)
 		nf_ct_l4proto_pernet_unregister_one(net, l4proto[num_proto]);
diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c
index f4fb6d4dd0b9..fcabccc99f0d 100644
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -47,7 +47,8 @@ static const struct nla_policy cttimeout_nla_policy[CTA_TIMEOUT_MAX+1] = {
 };
 
 static int
-ctnl_timeout_parse_policy(void *timeouts, struct nf_conntrack_l4proto *l4proto,
+ctnl_timeout_parse_policy(void *timeouts,
+			  const struct nf_conntrack_l4proto *l4proto,
 			  struct net *net, const struct nlattr *attr)
 {
 	int ret = 0;
@@ -401,7 +402,7 @@ err:
 static int
 cttimeout_default_fill_info(struct net *net, struct sk_buff *skb, u32 portid,
 			    u32 seq, u32 type, int event,
-			    struct nf_conntrack_l4proto *l4proto)
+			    const struct nf_conntrack_l4proto *l4proto)
 {
 	struct nlmsghdr *nlh;
 	struct nfgenmsg *nfmsg;
-- 
cgit v1.2.3


From 2202e35d47fff379fc744e6bd3c111b018cc77df Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Wed, 2 Aug 2017 09:56:06 +0200
Subject: ipv4: fib: Remove unused functions

Previous patches converted users of these functions to provide offload
indication using the nexthop's flags instead of the FIB info's.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Acked-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip_fib.h | 13 -------------
 1 file changed, 13 deletions(-)

(limited to 'include/net')

diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 41d580c6185f..ef8992d49bc3 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -124,7 +124,6 @@ struct fib_info {
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
 	int			fib_weight;
 #endif
-	unsigned int		fib_offload_cnt;
 	struct rcu_head		rcu;
 	struct fib_nh		fib_nh[0];
 #define fib_dev		fib_nh[0].nh_dev
@@ -177,18 +176,6 @@ struct fib_result_nl {
 
 __be32 fib_info_update_nh_saddr(struct net *net, struct fib_nh *nh);
 
-static inline void fib_info_offload_inc(struct fib_info *fi)
-{
-	fi->fib_offload_cnt++;
-	fi->fib_flags |= RTNH_F_OFFLOAD;
-}
-
-static inline void fib_info_offload_dec(struct fib_info *fi)
-{
-	if (--fi->fib_offload_cnt == 0)
-		fi->fib_flags &= ~RTNH_F_OFFLOAD;
-}
-
 #define FIB_RES_SADDR(net, res)				\
 	((FIB_RES_NH(res).nh_saddr_genid ==		\
 	  atomic_read(&(net)->ipv4.dev_addr_genid)) ?	\
-- 
cgit v1.2.3


From d8238d9dab8fbea22dd04f4e77639c7f7b83eef7 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Thu, 3 Aug 2017 15:42:11 +0800
Subject: sctp: remove the typedef sctp_errhdr_t

This patch is to remove the typedef sctp_errhdr_t, and replace
with struct sctp_errhdr in the places where it's using this
typedef.

It is also to use sizeof(variable) instead of sizeof(type).

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/sctp.h     |  6 +++---
 include/net/sctp/sctp.h  |  8 ++++----
 net/sctp/sm_make_chunk.c | 38 ++++++++++++++++++++------------------
 net/sctp/sm_sideeffect.c |  2 +-
 net/sctp/sm_statefuns.c  | 29 +++++++++++++++--------------
 net/sctp/ulpevent.c      | 10 +++++-----
 6 files changed, 48 insertions(+), 45 deletions(-)

(limited to 'include/net')

diff --git a/include/linux/sctp.h b/include/linux/sctp.h
index 553020cbf6f7..d35bdd30fa0f 100644
--- a/include/linux/sctp.h
+++ b/include/linux/sctp.h
@@ -427,15 +427,15 @@ struct sctp_shutdown_chunk {
 
 /* RFC 2960.  Section 3.3.10 Operation Error (ERROR) (9) */
 
-typedef struct sctp_errhdr {
+struct sctp_errhdr {
 	__be16 cause;
 	__be16 length;
 	__u8  variable[0];
-} sctp_errhdr_t;
+};
 
 typedef struct sctp_operr_chunk {
 	struct sctp_chunkhdr chunk_hdr;
-	sctp_errhdr_t err_hdr;
+	struct sctp_errhdr err_hdr;
 } sctp_operr_chunk_t;
 
 /* RFC 2960 3.3.10 - Operation Error
diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index 45fd4c6056b5..84650fed1e6a 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -479,13 +479,13 @@ for (pos.v = chunk->member;\
 _sctp_walk_errors((err), (chunk_hdr), ntohs((chunk_hdr)->length))
 
 #define _sctp_walk_errors(err, chunk_hdr, end)\
-for (err = (sctp_errhdr_t *)((void *)chunk_hdr + \
+for (err = (struct sctp_errhdr *)((void *)chunk_hdr + \
 	    sizeof(struct sctp_chunkhdr));\
-     ((void *)err + offsetof(sctp_errhdr_t, length) + sizeof(err->length) <=\
+     ((void *)err + offsetof(struct sctp_errhdr, length) + sizeof(err->length) <=\
       (void *)chunk_hdr + end) &&\
      (void *)err <= (void *)chunk_hdr + end - ntohs(err->length) &&\
-     ntohs(err->length) >= sizeof(sctp_errhdr_t); \
-     err = (sctp_errhdr_t *)((void *)err + SCTP_PAD4(ntohs(err->length))))
+     ntohs(err->length) >= sizeof(struct sctp_errhdr); \
+     err = (struct sctp_errhdr *)((void *)err + SCTP_PAD4(ntohs(err->length))))
 
 #define sctp_walk_fwdtsn(pos, chunk)\
 _sctp_walk_fwdtsn((pos), (chunk), ntohs((chunk)->chunk_hdr->length) - sizeof(struct sctp_fwdtsn_chunk))
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 8f1c6b639ac1..0b2298bbb84e 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -135,14 +135,14 @@ static const struct sctp_paramhdr prsctp_param = {
 void  sctp_init_cause(struct sctp_chunk *chunk, __be16 cause_code,
 		      size_t paylen)
 {
-	sctp_errhdr_t err;
+	struct sctp_errhdr err;
 	__u16 len;
 
 	/* Cause code constants are now defined in network order.  */
 	err.cause = cause_code;
-	len = sizeof(sctp_errhdr_t) + paylen;
+	len = sizeof(err) + paylen;
 	err.length  = htons(len);
-	chunk->subh.err_hdr = sctp_addto_chunk(chunk, sizeof(sctp_errhdr_t), &err);
+	chunk->subh.err_hdr = sctp_addto_chunk(chunk, sizeof(err), &err);
 }
 
 /* A helper to initialize an op error inside a
@@ -153,19 +153,19 @@ void  sctp_init_cause(struct sctp_chunk *chunk, __be16 cause_code,
 static int sctp_init_cause_fixed(struct sctp_chunk *chunk, __be16 cause_code,
 		      size_t paylen)
 {
-	sctp_errhdr_t err;
+	struct sctp_errhdr err;
 	__u16 len;
 
 	/* Cause code constants are now defined in network order.  */
 	err.cause = cause_code;
-	len = sizeof(sctp_errhdr_t) + paylen;
+	len = sizeof(err) + paylen;
 	err.length  = htons(len);
 
 	if (skb_tailroom(chunk->skb) < len)
 		return -ENOSPC;
-	chunk->subh.err_hdr = sctp_addto_chunk_fixed(chunk,
-						     sizeof(sctp_errhdr_t),
-						     &err);
+
+	chunk->subh.err_hdr = sctp_addto_chunk_fixed(chunk, sizeof(err), &err);
+
 	return 0;
 }
 /* 3.3.2 Initiation (INIT) (1)
@@ -979,8 +979,8 @@ struct sctp_chunk *sctp_make_abort_no_data(
 	struct sctp_chunk *retval;
 	__be32 payload;
 
-	retval = sctp_make_abort(asoc, chunk, sizeof(sctp_errhdr_t)
-				 + sizeof(tsn));
+	retval = sctp_make_abort(asoc, chunk,
+				 sizeof(struct sctp_errhdr) + sizeof(tsn));
 
 	if (!retval)
 		goto no_mem;
@@ -1015,7 +1015,8 @@ struct sctp_chunk *sctp_make_abort_user(const struct sctp_association *asoc,
 	void *payload = NULL;
 	int err;
 
-	retval = sctp_make_abort(asoc, NULL, sizeof(sctp_errhdr_t) + paylen);
+	retval = sctp_make_abort(asoc, NULL,
+				 sizeof(struct sctp_errhdr) + paylen);
 	if (!retval)
 		goto err_chunk;
 
@@ -1080,8 +1081,8 @@ struct sctp_chunk *sctp_make_abort_violation(
 	struct sctp_chunk  *retval;
 	struct sctp_paramhdr phdr;
 
-	retval = sctp_make_abort(asoc, chunk, sizeof(sctp_errhdr_t) + paylen +
-					      sizeof(phdr));
+	retval = sctp_make_abort(asoc, chunk, sizeof(struct sctp_errhdr) +
+					      paylen + sizeof(phdr));
 	if (!retval)
 		goto end;
 
@@ -1104,7 +1105,7 @@ struct sctp_chunk *sctp_make_violation_paramlen(
 {
 	struct sctp_chunk *retval;
 	static const char error[] = "The following parameter had invalid length:";
-	size_t payload_len = sizeof(error) + sizeof(sctp_errhdr_t) +
+	size_t payload_len = sizeof(error) + sizeof(struct sctp_errhdr) +
 			     sizeof(*param);
 
 	retval = sctp_make_abort(asoc, chunk, payload_len);
@@ -1126,7 +1127,7 @@ struct sctp_chunk *sctp_make_violation_max_retrans(
 {
 	struct sctp_chunk *retval;
 	static const char error[] = "Association exceeded its max_retans count";
-	size_t payload_len = sizeof(error) + sizeof(sctp_errhdr_t);
+	size_t payload_len = sizeof(error) + sizeof(struct sctp_errhdr);
 
 	retval = sctp_make_abort(asoc, chunk, payload_len);
 	if (!retval)
@@ -1209,7 +1210,8 @@ static struct sctp_chunk *sctp_make_op_error_space(
 	struct sctp_chunk *retval;
 
 	retval = sctp_make_control(asoc, SCTP_CID_ERROR, 0,
-				   sizeof(sctp_errhdr_t) + size, GFP_ATOMIC);
+				   sizeof(struct sctp_errhdr) + size,
+				   GFP_ATOMIC);
 	if (!retval)
 		goto nodata;
 
@@ -2966,7 +2968,7 @@ static void sctp_add_asconf_response(struct sctp_chunk *chunk, __be32 crr_id,
 			      __be16 err_code, sctp_addip_param_t *asconf_param)
 {
 	sctp_addip_param_t 	ack_param;
-	sctp_errhdr_t		err_param;
+	struct sctp_errhdr	err_param;
 	int			asconf_param_len = 0;
 	int			err_param_len = 0;
 	__be16			response_type;
@@ -3351,7 +3353,7 @@ static __be16 sctp_get_asconf_response(struct sctp_chunk *asconf_ack,
 				      int no_err)
 {
 	sctp_addip_param_t	*asconf_ack_param;
-	sctp_errhdr_t		*err_param;
+	struct sctp_errhdr	*err_param;
 	int			length;
 	int			asconf_ack_len;
 	__be16			err_code;
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index d6e5e9e0fd6d..5dda8c42b5f6 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -828,7 +828,7 @@ static void sctp_cmd_assoc_update(sctp_cmd_seq_t *cmds,
 	if (!sctp_assoc_update(asoc, new))
 		return;
 
-	abort = sctp_make_abort(asoc, NULL, sizeof(sctp_errhdr_t));
+	abort = sctp_make_abort(asoc, NULL, sizeof(struct sctp_errhdr));
 	if (abort) {
 		sctp_init_cause(abort, SCTP_ERROR_RSRC_LOW, 0);
 		sctp_add_cmd_sf(cmds, SCTP_CMD_REPLY, SCTP_CHUNK(abort));
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index d4d8fab52a11..7a2ba4c187d0 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -1233,7 +1233,7 @@ static int sctp_sf_send_restart_abort(struct net *net, union sctp_addr *ssa,
 	union sctp_addr_param *addrparm;
 	struct sctp_errhdr *errhdr;
 	struct sctp_endpoint *ep;
-	char buffer[sizeof(struct sctp_errhdr)+sizeof(union sctp_addr_param)];
+	char buffer[sizeof(*errhdr) + sizeof(*addrparm)];
 	struct sctp_af *af = sctp_get_af_specific(ssa->v4.sin_family);
 
 	/* Build the error on the stack.   We are way to malloc crazy
@@ -1244,7 +1244,7 @@ static int sctp_sf_send_restart_abort(struct net *net, union sctp_addr *ssa,
 
 	/* Copy into a parm format. */
 	len = af->to_addr_param(ssa, addrparm);
-	len += sizeof(sctp_errhdr_t);
+	len += sizeof(*errhdr);
 
 	errhdr->cause = SCTP_ERROR_RESTART;
 	errhdr->length = htons(len);
@@ -2270,7 +2270,7 @@ sctp_disposition_t sctp_sf_cookie_echoed_err(struct net *net,
 					sctp_cmd_seq_t *commands)
 {
 	struct sctp_chunk *chunk = arg;
-	sctp_errhdr_t *err;
+	struct sctp_errhdr *err;
 
 	if (!sctp_vtag_verify(chunk, asoc))
 		return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
@@ -2337,7 +2337,7 @@ static sctp_disposition_t sctp_sf_do_5_2_6_stale(struct net *net,
 	struct sctp_chunk *chunk = arg, *reply;
 	struct sctp_cookie_preserve_param bht;
 	struct sctp_bind_addr *bp;
-	sctp_errhdr_t *err;
+	struct sctp_errhdr *err;
 	u32 stale;
 
 	if (attempts > asoc->max_init_attempts) {
@@ -2348,7 +2348,7 @@ static sctp_disposition_t sctp_sf_do_5_2_6_stale(struct net *net,
 		return SCTP_DISPOSITION_DELETE_TCB;
 	}
 
-	err = (sctp_errhdr_t *)(chunk->skb->data);
+	err = (struct sctp_errhdr *)(chunk->skb->data);
 
 	/* When calculating the time extension, an implementation
 	 * SHOULD use the RTT information measured based on the
@@ -2364,7 +2364,7 @@ static sctp_disposition_t sctp_sf_do_5_2_6_stale(struct net *net,
 	 * to give ample time to retransmit the new cookie and thus
 	 * yield a higher probability of success on the reattempt.
 	 */
-	stale = ntohl(*(__be32 *)((u8 *)err + sizeof(sctp_errhdr_t)));
+	stale = ntohl(*(__be32 *)((u8 *)err + sizeof(*err)));
 	stale = (stale * 2) / 1000;
 
 	bht.param_hdr.type = SCTP_PARAM_COOKIE_PRESERVATIVE;
@@ -2499,13 +2499,14 @@ static sctp_disposition_t __sctp_sf_do_9_1_abort(struct net *net,
 	/* See if we have an error cause code in the chunk.  */
 	len = ntohs(chunk->chunk_hdr->length);
 	if (len >= sizeof(struct sctp_chunkhdr) + sizeof(struct sctp_errhdr)) {
+		struct sctp_errhdr *err;
 
-		sctp_errhdr_t *err;
 		sctp_walk_errors(err, chunk->chunk_hdr);
 		if ((void *)err != (void *)chunk->chunk_end)
-			return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
+			return sctp_sf_pdiscard(net, ep, asoc, type, arg,
+						commands);
 
-		error = ((sctp_errhdr_t *)chunk->skb->data)->cause;
+		error = ((struct sctp_errhdr *)chunk->skb->data)->cause;
 	}
 
 	sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, SCTP_ERROR(ECONNRESET));
@@ -2552,7 +2553,7 @@ sctp_disposition_t sctp_sf_cookie_wait_abort(struct net *net,
 	/* See if we have an error cause code in the chunk.  */
 	len = ntohs(chunk->chunk_hdr->length);
 	if (len >= sizeof(struct sctp_chunkhdr) + sizeof(struct sctp_errhdr))
-		error = ((sctp_errhdr_t *)chunk->skb->data)->cause;
+		error = ((struct sctp_errhdr *)chunk->skb->data)->cause;
 
 	return sctp_stop_t1_and_abort(net, commands, error, ECONNREFUSED, asoc,
 				      chunk->transport);
@@ -3310,7 +3311,7 @@ sctp_disposition_t sctp_sf_operr_notify(struct net *net,
 					sctp_cmd_seq_t *commands)
 {
 	struct sctp_chunk *chunk = arg;
-	sctp_errhdr_t *err;
+	struct sctp_errhdr *err;
 
 	if (!sctp_vtag_verify(chunk, asoc))
 		return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
@@ -3433,7 +3434,7 @@ sctp_disposition_t sctp_sf_ootb(struct net *net,
 	struct sctp_chunk *chunk = arg;
 	struct sk_buff *skb = chunk->skb;
 	struct sctp_chunkhdr *ch;
-	sctp_errhdr_t *err;
+	struct sctp_errhdr *err;
 	__u8 *ch_end;
 	int ootb_shut_ack = 0;
 	int ootb_cookie_ack = 0;
@@ -3776,7 +3777,7 @@ sctp_disposition_t sctp_sf_do_asconf_ack(struct net *net,
 	if (ADDIP_SERIAL_gte(rcvd_serial, sent_serial + 1) &&
 	    !(asoc->addip_last_asconf)) {
 		abort = sctp_make_abort(asoc, asconf_ack,
-					sizeof(sctp_errhdr_t));
+					sizeof(struct sctp_errhdr));
 		if (abort) {
 			sctp_init_cause(abort, SCTP_ERROR_ASCONF_ACK, 0);
 			sctp_add_cmd_sf(commands, SCTP_CMD_REPLY,
@@ -3812,7 +3813,7 @@ sctp_disposition_t sctp_sf_do_asconf_ack(struct net *net,
 		}
 
 		abort = sctp_make_abort(asoc, asconf_ack,
-					sizeof(sctp_errhdr_t));
+					sizeof(struct sctp_errhdr));
 		if (abort) {
 			sctp_init_cause(abort, SCTP_ERROR_RSRC_LOW, 0);
 			sctp_add_cmd_sf(commands, SCTP_CMD_REPLY,
diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c
index 5f86c5062a98..67abc0194f30 100644
--- a/net/sctp/ulpevent.c
+++ b/net/sctp/ulpevent.c
@@ -371,19 +371,19 @@ sctp_ulpevent_make_remote_error(const struct sctp_association *asoc,
 				struct sctp_chunk *chunk, __u16 flags,
 				gfp_t gfp)
 {
-	struct sctp_ulpevent *event;
 	struct sctp_remote_error *sre;
+	struct sctp_ulpevent *event;
+	struct sctp_errhdr *ch;
 	struct sk_buff *skb;
-	sctp_errhdr_t *ch;
 	__be16 cause;
 	int elen;
 
-	ch = (sctp_errhdr_t *)(chunk->skb->data);
+	ch = (struct sctp_errhdr *)(chunk->skb->data);
 	cause = ch->cause;
-	elen = SCTP_PAD4(ntohs(ch->length)) - sizeof(sctp_errhdr_t);
+	elen = SCTP_PAD4(ntohs(ch->length)) - sizeof(*ch);
 
 	/* Pull off the ERROR header.  */
-	skb_pull(chunk->skb, sizeof(sctp_errhdr_t));
+	skb_pull(chunk->skb, sizeof(*ch));
 
 	/* Copy the skb to a new skb with room for us to prepend
 	 * notification with.
-- 
cgit v1.2.3


From 04b1d4e50e82536c12da00ee04a77510c459c844 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Thu, 3 Aug 2017 13:28:11 +0200
Subject: net: core: Make the FIB notification chain generic

The FIB notification chain is currently soley used by IPv4 code.
However, we're going to introduce IPv6 FIB offload support, which
requires these notification as well.

As explained in commit c3852ef7f2f8 ("ipv4: fib: Replay events when
registering FIB notifier"), upon registration to the chain, the callee
receives a full dump of the FIB tables and rules by traversing all the
net namespaces. The integrity of the dump is ensured by a per-namespace
sequence counter that is incremented whenever a change to the tables or
rules occurs.

In order to allow more address families to use the chain, each family is
expected to register its fib_notifier_ops in its pernet init. These
operations allow the common code to read the family's sequence counter
as well as dump its tables and rules in the given net namespace.

Additionally, a 'family' parameter is added to sent notifications, so
that listeners could distinguish between the different families.

Implement the common code that allows listeners to register to the chain
and for address families to register their fib_notifier_ops. Subsequent
patches will implement these operations in IPv6.

In the future, ipmr and ip6mr will be extended to provide these
notifications as well.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/mellanox/mlxsw/spectrum_router.c  |   1 +
 drivers/net/ethernet/rocker/rocker_main.c          |   1 +
 include/net/fib_notifier.h                         |  44 ++++++
 include/net/ip_fib.h                               |  30 +---
 include/net/net_namespace.h                        |   1 +
 include/net/netns/ipv4.h                           |   1 +
 net/core/Makefile                                  |   3 +-
 net/core/fib_notifier.c                            | 164 +++++++++++++++++++++
 net/ipv4/fib_frontend.c                            |  17 ++-
 net/ipv4/fib_notifier.c                            |  94 +++++-------
 net/ipv4/fib_rules.c                               |   5 +-
 net/ipv4/fib_semantics.c                           |   9 +-
 net/ipv4/fib_trie.c                                |   5 +-
 13 files changed, 282 insertions(+), 93 deletions(-)
 create mode 100644 include/net/fib_notifier.h
 create mode 100644 net/core/fib_notifier.c

(limited to 'include/net')

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 2f03c7e71584..b79f9b67f285 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -53,6 +53,7 @@
 #include <net/addrconf.h>
 #include <net/ndisc.h>
 #include <net/ipv6.h>
+#include <net/fib_notifier.h>
 
 #include "spectrum.h"
 #include "core.h"
diff --git a/drivers/net/ethernet/rocker/rocker_main.c b/drivers/net/ethernet/rocker/rocker_main.c
index b1e5c07099fa..ef38c1a41bdd 100644
--- a/drivers/net/ethernet/rocker/rocker_main.c
+++ b/drivers/net/ethernet/rocker/rocker_main.c
@@ -34,6 +34,7 @@
 #include <net/netevent.h>
 #include <net/arp.h>
 #include <net/fib_rules.h>
+#include <net/fib_notifier.h>
 #include <linux/io-64-nonatomic-lo-hi.h>
 #include <generated/utsrelease.h>
 
diff --git a/include/net/fib_notifier.h b/include/net/fib_notifier.h
new file mode 100644
index 000000000000..241475224f74
--- /dev/null
+++ b/include/net/fib_notifier.h
@@ -0,0 +1,44 @@
+#ifndef __NET_FIB_NOTIFIER_H
+#define __NET_FIB_NOTIFIER_H
+
+#include <linux/types.h>
+#include <linux/notifier.h>
+#include <net/net_namespace.h>
+
+struct fib_notifier_info {
+	struct net *net;
+	int family;
+};
+
+enum fib_event_type {
+	FIB_EVENT_ENTRY_REPLACE,
+	FIB_EVENT_ENTRY_APPEND,
+	FIB_EVENT_ENTRY_ADD,
+	FIB_EVENT_ENTRY_DEL,
+	FIB_EVENT_RULE_ADD,
+	FIB_EVENT_RULE_DEL,
+	FIB_EVENT_NH_ADD,
+	FIB_EVENT_NH_DEL,
+};
+
+struct fib_notifier_ops {
+	int family;
+	struct list_head list;
+	unsigned int (*fib_seq_read)(struct net *net);
+	int (*fib_dump)(struct net *net, struct notifier_block *nb);
+	struct rcu_head rcu;
+};
+
+int call_fib_notifier(struct notifier_block *nb, struct net *net,
+		      enum fib_event_type event_type,
+		      struct fib_notifier_info *info);
+int call_fib_notifiers(struct net *net, enum fib_event_type event_type,
+		       struct fib_notifier_info *info);
+int register_fib_notifier(struct notifier_block *nb,
+			  void (*cb)(struct notifier_block *nb));
+int unregister_fib_notifier(struct notifier_block *nb);
+struct fib_notifier_ops *
+fib_notifier_ops_register(const struct fib_notifier_ops *tmpl, struct net *net);
+void fib_notifier_ops_unregister(struct fib_notifier_ops *ops);
+
+#endif
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index ef8992d49bc3..c0295c3ec5f3 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -19,6 +19,7 @@
 #include <net/flow.h>
 #include <linux/seq_file.h>
 #include <linux/rcupdate.h>
+#include <net/fib_notifier.h>
 #include <net/fib_rules.h>
 #include <net/inetpeer.h>
 #include <linux/percpu.h>
@@ -188,10 +189,6 @@ __be32 fib_info_update_nh_saddr(struct net *net, struct fib_nh *nh);
 #define FIB_RES_PREFSRC(net, res)	((res).fi->fib_prefsrc ? : \
 					 FIB_RES_SADDR(net, res))
 
-struct fib_notifier_info {
-	struct net *net;
-};
-
 struct fib_entry_notifier_info {
 	struct fib_notifier_info info; /* must be first */
 	u32 dst;
@@ -212,25 +209,14 @@ struct fib_nh_notifier_info {
 	struct fib_nh *fib_nh;
 };
 
-enum fib_event_type {
-	FIB_EVENT_ENTRY_REPLACE,
-	FIB_EVENT_ENTRY_APPEND,
-	FIB_EVENT_ENTRY_ADD,
-	FIB_EVENT_ENTRY_DEL,
-	FIB_EVENT_RULE_ADD,
-	FIB_EVENT_RULE_DEL,
-	FIB_EVENT_NH_ADD,
-	FIB_EVENT_NH_DEL,
-};
-
-int register_fib_notifier(struct notifier_block *nb,
-			  void (*cb)(struct notifier_block *nb));
-int unregister_fib_notifier(struct notifier_block *nb);
-int call_fib_notifier(struct notifier_block *nb, struct net *net,
-		      enum fib_event_type event_type,
-		      struct fib_notifier_info *info);
-int call_fib_notifiers(struct net *net, enum fib_event_type event_type,
+int call_fib4_notifier(struct notifier_block *nb, struct net *net,
+		       enum fib_event_type event_type,
 		       struct fib_notifier_info *info);
+int call_fib4_notifiers(struct net *net, enum fib_event_type event_type,
+			struct fib_notifier_info *info);
+
+int __net_init fib4_notifier_init(struct net *net);
+void __net_exit fib4_notifier_exit(struct net *net);
 
 void fib_notify(struct net *net, struct notifier_block *nb);
 #ifdef CONFIG_IP_MULTIPLE_TABLES
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 1c401bd4c2e0..57faa375eab9 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -88,6 +88,7 @@ struct net {
 	/* core fib_rules */
 	struct list_head	rules_ops;
 
+	struct list_head	fib_notifier_ops;  /* protected by net_mutex */
 
 	struct net_device       *loopback_dev;          /* The loopback */
 	struct netns_core	core;
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 9a14a0850b0e..20d061c805e3 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -159,6 +159,7 @@ struct netns_ipv4 {
 	int sysctl_fib_multipath_hash_policy;
 #endif
 
+	struct fib_notifier_ops	*notifier_ops;
 	unsigned int	fib_seq;	/* protected by rtnl_mutex */
 
 	atomic_t	rt_genid;
diff --git a/net/core/Makefile b/net/core/Makefile
index d501c4278015..56d771a887b6 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -9,7 +9,8 @@ obj-$(CONFIG_SYSCTL) += sysctl_net_core.o
 
 obj-y		     += dev.o ethtool.o dev_addr_lists.o dst.o netevent.o \
 			neighbour.o rtnetlink.o utils.o link_watch.o filter.o \
-			sock_diag.o dev_ioctl.o tso.o sock_reuseport.o
+			sock_diag.o dev_ioctl.o tso.o sock_reuseport.o \
+			fib_notifier.o
 
 obj-y += net-sysfs.o
 obj-$(CONFIG_PROC_FS) += net-procfs.o
diff --git a/net/core/fib_notifier.c b/net/core/fib_notifier.c
new file mode 100644
index 000000000000..292aab83702f
--- /dev/null
+++ b/net/core/fib_notifier.c
@@ -0,0 +1,164 @@
+#include <linux/rtnetlink.h>
+#include <linux/notifier.h>
+#include <linux/rcupdate.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <net/net_namespace.h>
+#include <net/fib_notifier.h>
+
+static ATOMIC_NOTIFIER_HEAD(fib_chain);
+
+int call_fib_notifier(struct notifier_block *nb, struct net *net,
+		      enum fib_event_type event_type,
+		      struct fib_notifier_info *info)
+{
+	info->net = net;
+	return nb->notifier_call(nb, event_type, info);
+}
+EXPORT_SYMBOL(call_fib_notifier);
+
+int call_fib_notifiers(struct net *net, enum fib_event_type event_type,
+		       struct fib_notifier_info *info)
+{
+	info->net = net;
+	return atomic_notifier_call_chain(&fib_chain, event_type, info);
+}
+EXPORT_SYMBOL(call_fib_notifiers);
+
+static unsigned int fib_seq_sum(void)
+{
+	struct fib_notifier_ops *ops;
+	unsigned int fib_seq = 0;
+	struct net *net;
+
+	rtnl_lock();
+	for_each_net(net) {
+		list_for_each_entry(ops, &net->fib_notifier_ops, list)
+			fib_seq += ops->fib_seq_read(net);
+	}
+	rtnl_unlock();
+
+	return fib_seq;
+}
+
+static int fib_net_dump(struct net *net, struct notifier_block *nb)
+{
+	struct fib_notifier_ops *ops;
+
+	list_for_each_entry_rcu(ops, &net->fib_notifier_ops, list) {
+		int err = ops->fib_dump(net, nb);
+
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static bool fib_dump_is_consistent(struct notifier_block *nb,
+				   void (*cb)(struct notifier_block *nb),
+				   unsigned int fib_seq)
+{
+	atomic_notifier_chain_register(&fib_chain, nb);
+	if (fib_seq == fib_seq_sum())
+		return true;
+	atomic_notifier_chain_unregister(&fib_chain, nb);
+	if (cb)
+		cb(nb);
+	return false;
+}
+
+#define FIB_DUMP_MAX_RETRIES 5
+int register_fib_notifier(struct notifier_block *nb,
+			  void (*cb)(struct notifier_block *nb))
+{
+	int retries = 0;
+	int err;
+
+	do {
+		unsigned int fib_seq = fib_seq_sum();
+		struct net *net;
+
+		rcu_read_lock();
+		for_each_net_rcu(net) {
+			err = fib_net_dump(net, nb);
+			if (err)
+				goto err_fib_net_dump;
+		}
+		rcu_read_unlock();
+
+		if (fib_dump_is_consistent(nb, cb, fib_seq))
+			return 0;
+	} while (++retries < FIB_DUMP_MAX_RETRIES);
+
+	return -EBUSY;
+
+err_fib_net_dump:
+	rcu_read_unlock();
+	return err;
+}
+EXPORT_SYMBOL(register_fib_notifier);
+
+int unregister_fib_notifier(struct notifier_block *nb)
+{
+	return atomic_notifier_chain_unregister(&fib_chain, nb);
+}
+EXPORT_SYMBOL(unregister_fib_notifier);
+
+static int __fib_notifier_ops_register(struct fib_notifier_ops *ops,
+				       struct net *net)
+{
+	struct fib_notifier_ops *o;
+
+	list_for_each_entry(o, &net->fib_notifier_ops, list)
+		if (ops->family == o->family)
+			return -EEXIST;
+	list_add_tail_rcu(&ops->list, &net->fib_notifier_ops);
+	return 0;
+}
+
+struct fib_notifier_ops *
+fib_notifier_ops_register(const struct fib_notifier_ops *tmpl, struct net *net)
+{
+	struct fib_notifier_ops *ops;
+	int err;
+
+	ops = kmemdup(tmpl, sizeof(*ops), GFP_KERNEL);
+	if (!ops)
+		return ERR_PTR(-ENOMEM);
+
+	err = __fib_notifier_ops_register(ops, net);
+	if (err)
+		goto err_register;
+
+	return ops;
+
+err_register:
+	kfree(ops);
+	return ERR_PTR(err);
+}
+EXPORT_SYMBOL(fib_notifier_ops_register);
+
+void fib_notifier_ops_unregister(struct fib_notifier_ops *ops)
+{
+	list_del_rcu(&ops->list);
+	kfree_rcu(ops, rcu);
+}
+EXPORT_SYMBOL(fib_notifier_ops_unregister);
+
+static int __net_init fib_notifier_net_init(struct net *net)
+{
+	INIT_LIST_HEAD(&net->fib_notifier_ops);
+	return 0;
+}
+
+static struct pernet_operations fib_notifier_net_ops = {
+	.init = fib_notifier_net_init,
+};
+
+static int __init fib_notifier_init(void)
+{
+	return register_pernet_subsys(&fib_notifier_net_ops);
+}
+
+subsys_initcall(fib_notifier_init);
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 044d2a159a3c..2cba559f14df 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -1247,22 +1247,28 @@ static int __net_init ip_fib_net_init(struct net *net)
 	int err;
 	size_t size = sizeof(struct hlist_head) * FIB_TABLE_HASHSZ;
 
-	net->ipv4.fib_seq = 0;
+	err = fib4_notifier_init(net);
+	if (err)
+		return err;
 
 	/* Avoid false sharing : Use at least a full cache line */
 	size = max_t(size_t, size, L1_CACHE_BYTES);
 
 	net->ipv4.fib_table_hash = kzalloc(size, GFP_KERNEL);
-	if (!net->ipv4.fib_table_hash)
-		return -ENOMEM;
+	if (!net->ipv4.fib_table_hash) {
+		err = -ENOMEM;
+		goto err_table_hash_alloc;
+	}
 
 	err = fib4_rules_init(net);
 	if (err < 0)
-		goto fail;
+		goto err_rules_init;
 	return 0;
 
-fail:
+err_rules_init:
 	kfree(net->ipv4.fib_table_hash);
+err_table_hash_alloc:
+	fib4_notifier_exit(net);
 	return err;
 }
 
@@ -1292,6 +1298,7 @@ static void ip_fib_net_exit(struct net *net)
 #endif
 	rtnl_unlock();
 	kfree(net->ipv4.fib_table_hash);
+	fib4_notifier_exit(net);
 }
 
 static int __net_init fib_net_init(struct net *net)
diff --git a/net/ipv4/fib_notifier.c b/net/ipv4/fib_notifier.c
index e0714d975947..7cf1954bbadc 100644
--- a/net/ipv4/fib_notifier.c
+++ b/net/ipv4/fib_notifier.c
@@ -1,86 +1,66 @@
 #include <linux/rtnetlink.h>
 #include <linux/notifier.h>
-#include <linux/rcupdate.h>
+#include <linux/socket.h>
 #include <linux/kernel.h>
 #include <net/net_namespace.h>
+#include <net/fib_notifier.h>
 #include <net/netns/ipv4.h>
 #include <net/ip_fib.h>
 
-static ATOMIC_NOTIFIER_HEAD(fib_chain);
-
-int call_fib_notifier(struct notifier_block *nb, struct net *net,
-		      enum fib_event_type event_type,
-		      struct fib_notifier_info *info)
+int call_fib4_notifier(struct notifier_block *nb, struct net *net,
+		       enum fib_event_type event_type,
+		       struct fib_notifier_info *info)
 {
-	info->net = net;
-	return nb->notifier_call(nb, event_type, info);
+	info->family = AF_INET;
+	return call_fib_notifier(nb, net, event_type, info);
 }
 
-int call_fib_notifiers(struct net *net, enum fib_event_type event_type,
-		       struct fib_notifier_info *info)
+int call_fib4_notifiers(struct net *net, enum fib_event_type event_type,
+			struct fib_notifier_info *info)
 {
+	ASSERT_RTNL();
+
+	info->family = AF_INET;
 	net->ipv4.fib_seq++;
-	info->net = net;
-	return atomic_notifier_call_chain(&fib_chain, event_type, info);
+	return call_fib_notifiers(net, event_type, info);
 }
 
-static unsigned int fib_seq_sum(void)
+static unsigned int fib4_seq_read(struct net *net)
 {
-	unsigned int fib_seq = 0;
-	struct net *net;
+	ASSERT_RTNL();
 
-	rtnl_lock();
-	for_each_net(net)
-		fib_seq += net->ipv4.fib_seq;
-	rtnl_unlock();
-
-	return fib_seq;
+	return net->ipv4.fib_seq;
 }
 
-static bool fib_dump_is_consistent(struct notifier_block *nb,
-				   void (*cb)(struct notifier_block *nb),
-				   unsigned int fib_seq)
+static int fib4_dump(struct net *net, struct notifier_block *nb)
 {
-	atomic_notifier_chain_register(&fib_chain, nb);
-	if (fib_seq == fib_seq_sum())
-		return true;
-	atomic_notifier_chain_unregister(&fib_chain, nb);
-	if (cb)
-		cb(nb);
-	return false;
+	fib_rules_notify(net, nb);
+	fib_notify(net, nb);
+
+	return 0;
 }
 
-#define FIB_DUMP_MAX_RETRIES 5
-int register_fib_notifier(struct notifier_block *nb,
-			  void (*cb)(struct notifier_block *nb))
-{
-	int retries = 0;
+static const struct fib_notifier_ops fib4_notifier_ops_template = {
+	.family		= AF_INET,
+	.fib_seq_read	= fib4_seq_read,
+	.fib_dump	= fib4_dump,
+};
 
-	do {
-		unsigned int fib_seq = fib_seq_sum();
-		struct net *net;
+int __net_init fib4_notifier_init(struct net *net)
+{
+	struct fib_notifier_ops *ops;
 
-		/* Mutex semantics guarantee that every change done to
-		 * FIB tries before we read the change sequence counter
-		 * is now visible to us.
-		 */
-		rcu_read_lock();
-		for_each_net_rcu(net) {
-			fib_rules_notify(net, nb);
-			fib_notify(net, nb);
-		}
-		rcu_read_unlock();
+	net->ipv4.fib_seq = 0;
 
-		if (fib_dump_is_consistent(nb, cb, fib_seq))
-			return 0;
-	} while (++retries < FIB_DUMP_MAX_RETRIES);
+	ops = fib_notifier_ops_register(&fib4_notifier_ops_template, net);
+	if (IS_ERR(ops))
+		return PTR_ERR(ops);
+	net->ipv4.notifier_ops = ops;
 
-	return -EBUSY;
+	return 0;
 }
-EXPORT_SYMBOL(register_fib_notifier);
 
-int unregister_fib_notifier(struct notifier_block *nb)
+void __net_exit fib4_notifier_exit(struct net *net)
 {
-	return atomic_notifier_chain_unregister(&fib_chain, nb);
+	fib_notifier_ops_unregister(net->ipv4.notifier_ops);
 }
-EXPORT_SYMBOL(unregister_fib_notifier);
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 778ecf977eb2..acdbf5a24ac9 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -32,6 +32,7 @@
 #include <net/tcp.h>
 #include <net/ip_fib.h>
 #include <net/fib_rules.h>
+#include <net/fib_notifier.h>
 
 struct fib4_rule {
 	struct fib_rule		common;
@@ -193,7 +194,7 @@ static int call_fib_rule_notifier(struct notifier_block *nb, struct net *net,
 		.rule = rule,
 	};
 
-	return call_fib_notifier(nb, net, event_type, &info.info);
+	return call_fib4_notifier(nb, net, event_type, &info.info);
 }
 
 static int call_fib_rule_notifiers(struct net *net,
@@ -204,7 +205,7 @@ static int call_fib_rule_notifiers(struct net *net,
 		.rule = rule,
 	};
 
-	return call_fib_notifiers(net, event_type, &info.info);
+	return call_fib4_notifiers(net, event_type, &info.info);
 }
 
 /* Called with rcu_read_lock() */
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index f62dc2463280..632b454ce77c 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -44,6 +44,7 @@
 #include <net/netlink.h>
 #include <net/nexthop.h>
 #include <net/lwtunnel.h>
+#include <net/fib_notifier.h>
 
 #include "fib_lookup.h"
 
@@ -1451,14 +1452,14 @@ static int call_fib_nh_notifiers(struct fib_nh *fib_nh,
 		if (IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
 		    fib_nh->nh_flags & RTNH_F_LINKDOWN)
 			break;
-		return call_fib_notifiers(dev_net(fib_nh->nh_dev), event_type,
-					  &info.info);
+		return call_fib4_notifiers(dev_net(fib_nh->nh_dev), event_type,
+					   &info.info);
 	case FIB_EVENT_NH_DEL:
 		if ((in_dev && IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
 		     fib_nh->nh_flags & RTNH_F_LINKDOWN) ||
 		    (fib_nh->nh_flags & RTNH_F_DEAD))
-			return call_fib_notifiers(dev_net(fib_nh->nh_dev),
-						  event_type, &info.info);
+			return call_fib4_notifiers(dev_net(fib_nh->nh_dev),
+						   event_type, &info.info);
 	default:
 		break;
 	}
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 64668c69dda6..1a6ffb0dab9c 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -81,6 +81,7 @@
 #include <net/tcp.h>
 #include <net/sock.h>
 #include <net/ip_fib.h>
+#include <net/fib_notifier.h>
 #include <trace/events/fib.h>
 #include "fib_lookup.h"
 
@@ -97,7 +98,7 @@ static int call_fib_entry_notifier(struct notifier_block *nb, struct net *net,
 		.type = type,
 		.tb_id = tb_id,
 	};
-	return call_fib_notifier(nb, net, event_type, &info.info);
+	return call_fib4_notifier(nb, net, event_type, &info.info);
 }
 
 static int call_fib_entry_notifiers(struct net *net,
@@ -113,7 +114,7 @@ static int call_fib_entry_notifiers(struct net *net,
 		.type = type,
 		.tb_id = tb_id,
 	};
-	return call_fib_notifiers(net, event_type, &info.info);
+	return call_fib4_notifiers(net, event_type, &info.info);
 }
 
 #define MAX_STAT_DEPTH 32
-- 
cgit v1.2.3


From 1b2a4440858857f2f93bb2ec5bb3a60f4fcc25be Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Thu, 3 Aug 2017 13:28:14 +0200
Subject: net: fib_rules: Implement notification logic in core

Unlike the routing tables, the FIB rules share a common core, so instead
of replicating the same logic for each address family we can simply dump
the rules and send notifications from the core itself.

To protect the integrity of the dump, a rules-specific sequence counter
is added for each address family and incremented whenever a rule is
added or deleted (under RTNL).

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/fib_rules.h |  9 +++++++
 include/net/ip_fib.h    | 24 +++++++++----------
 net/core/fib_rules.c    | 63 +++++++++++++++++++++++++++++++++++++++++++++++++
 net/ipv4/fib_notifier.c |  9 +++++--
 net/ipv4/fib_rules.c    | 45 ++++++++---------------------------
 5 files changed, 101 insertions(+), 49 deletions(-)

(limited to 'include/net')

diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h
index c487bfa2f479..3d7f1cefc6f5 100644
--- a/include/net/fib_rules.h
+++ b/include/net/fib_rules.h
@@ -8,6 +8,7 @@
 #include <linux/refcount.h>
 #include <net/flow.h>
 #include <net/rtnetlink.h>
+#include <net/fib_notifier.h>
 
 struct fib_kuid_range {
 	kuid_t start;
@@ -57,6 +58,7 @@ struct fib_rules_ops {
 	int			addr_size;
 	int			unresolved_rules;
 	int			nr_goto_rules;
+	unsigned int		fib_rules_seq;
 
 	int			(*action)(struct fib_rule *,
 					  struct flowi *, int,
@@ -89,6 +91,11 @@ struct fib_rules_ops {
 	struct rcu_head		rcu;
 };
 
+struct fib_rule_notifier_info {
+	struct fib_notifier_info info; /* must be first */
+	struct fib_rule *rule;
+};
+
 #define FRA_GENERIC_POLICY \
 	[FRA_IIFNAME]	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, \
 	[FRA_OIFNAME]	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, \
@@ -143,6 +150,8 @@ int fib_rules_lookup(struct fib_rules_ops *, struct flowi *, int flags,
 int fib_default_rule_add(struct fib_rules_ops *, u32 pref, u32 table,
 			 u32 flags);
 bool fib_rule_matchall(const struct fib_rule *rule);
+int fib_rules_dump(struct net *net, struct notifier_block *nb, int family);
+unsigned int fib_rules_seq_read(struct net *net, int family);
 
 int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh,
 		   struct netlink_ext_ack *extack);
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index c0295c3ec5f3..1a7f7e424320 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -199,11 +199,6 @@ struct fib_entry_notifier_info {
 	u32 tb_id;
 };
 
-struct fib_rule_notifier_info {
-	struct fib_notifier_info info; /* must be first */
-	struct fib_rule *rule;
-};
-
 struct fib_nh_notifier_info {
 	struct fib_notifier_info info; /* must be first */
 	struct fib_nh *fib_nh;
@@ -219,13 +214,6 @@ int __net_init fib4_notifier_init(struct net *net);
 void __net_exit fib4_notifier_exit(struct net *net);
 
 void fib_notify(struct net *net, struct notifier_block *nb);
-#ifdef CONFIG_IP_MULTIPLE_TABLES
-void fib_rules_notify(struct net *net, struct notifier_block *nb);
-#else
-static inline void fib_rules_notify(struct net *net, struct notifier_block *nb)
-{
-}
-#endif
 
 struct fib_table {
 	struct hlist_node	tb_hlist;
@@ -298,6 +286,16 @@ static inline bool fib4_rule_default(const struct fib_rule *rule)
 	return true;
 }
 
+static inline int fib4_rules_dump(struct net *net, struct notifier_block *nb)
+{
+	return 0;
+}
+
+static inline unsigned int fib4_rules_seq_read(struct net *net)
+{
+	return 0;
+}
+
 #else /* CONFIG_IP_MULTIPLE_TABLES */
 int __net_init fib4_rules_init(struct net *net);
 void __net_exit fib4_rules_exit(struct net *net);
@@ -343,6 +341,8 @@ out:
 }
 
 bool fib4_rule_default(const struct fib_rule *rule);
+int fib4_rules_dump(struct net *net, struct notifier_block *nb);
+unsigned int fib4_rules_seq_read(struct net *net);
 
 #endif /* CONFIG_IP_MULTIPLE_TABLES */
 
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index fdcb1bcd2afa..fc0b65093417 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -299,6 +299,67 @@ out:
 }
 EXPORT_SYMBOL_GPL(fib_rules_lookup);
 
+static int call_fib_rule_notifier(struct notifier_block *nb, struct net *net,
+				  enum fib_event_type event_type,
+				  struct fib_rule *rule, int family)
+{
+	struct fib_rule_notifier_info info = {
+		.info.family = family,
+		.rule = rule,
+	};
+
+	return call_fib_notifier(nb, net, event_type, &info.info);
+}
+
+static int call_fib_rule_notifiers(struct net *net,
+				   enum fib_event_type event_type,
+				   struct fib_rule *rule,
+				   struct fib_rules_ops *ops)
+{
+	struct fib_rule_notifier_info info = {
+		.info.family = ops->family,
+		.rule = rule,
+	};
+
+	ops->fib_rules_seq++;
+	return call_fib_notifiers(net, event_type, &info.info);
+}
+
+/* Called with rcu_read_lock() */
+int fib_rules_dump(struct net *net, struct notifier_block *nb, int family)
+{
+	struct fib_rules_ops *ops;
+	struct fib_rule *rule;
+
+	ops = lookup_rules_ops(net, family);
+	if (!ops)
+		return -EAFNOSUPPORT;
+	list_for_each_entry_rcu(rule, &ops->rules_list, list)
+		call_fib_rule_notifier(nb, net, FIB_EVENT_RULE_ADD, rule,
+				       family);
+	rules_ops_put(ops);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(fib_rules_dump);
+
+unsigned int fib_rules_seq_read(struct net *net, int family)
+{
+	unsigned int fib_rules_seq;
+	struct fib_rules_ops *ops;
+
+	ASSERT_RTNL();
+
+	ops = lookup_rules_ops(net, family);
+	if (!ops)
+		return 0;
+	fib_rules_seq = ops->fib_rules_seq;
+	rules_ops_put(ops);
+
+	return fib_rules_seq;
+}
+EXPORT_SYMBOL_GPL(fib_rules_seq_read);
+
 static int validate_rulemsg(struct fib_rule_hdr *frh, struct nlattr **tb,
 			    struct fib_rules_ops *ops)
 {
@@ -548,6 +609,7 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh,
 	if (rule->tun_id)
 		ip_tunnel_need_metadata();
 
+	call_fib_rule_notifiers(net, FIB_EVENT_RULE_ADD, rule, ops);
 	notify_rule_change(RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).portid);
 	flush_route_cache(ops);
 	rules_ops_put(ops);
@@ -687,6 +749,7 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh,
 			}
 		}
 
+		call_fib_rule_notifiers(net, FIB_EVENT_RULE_DEL, rule, ops);
 		notify_rule_change(RTM_DELRULE, rule, ops, nlh,
 				   NETLINK_CB(skb).portid);
 		fib_rule_put(rule);
diff --git a/net/ipv4/fib_notifier.c b/net/ipv4/fib_notifier.c
index 7cf1954bbadc..5d7afb145562 100644
--- a/net/ipv4/fib_notifier.c
+++ b/net/ipv4/fib_notifier.c
@@ -29,12 +29,17 @@ static unsigned int fib4_seq_read(struct net *net)
 {
 	ASSERT_RTNL();
 
-	return net->ipv4.fib_seq;
+	return net->ipv4.fib_seq + fib4_rules_seq_read(net);
 }
 
 static int fib4_dump(struct net *net, struct notifier_block *nb)
 {
-	fib_rules_notify(net, nb);
+	int err;
+
+	err = fib4_rules_dump(net, nb);
+	if (err)
+		return err;
+
 	fib_notify(net, nb);
 
 	return 0;
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index acdbf5a24ac9..35d646a62ad4 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -32,7 +32,6 @@
 #include <net/tcp.h>
 #include <net/ip_fib.h>
 #include <net/fib_rules.h>
-#include <net/fib_notifier.h>
 
 struct fib4_rule {
 	struct fib_rule		common;
@@ -69,6 +68,16 @@ bool fib4_rule_default(const struct fib_rule *rule)
 }
 EXPORT_SYMBOL_GPL(fib4_rule_default);
 
+int fib4_rules_dump(struct net *net, struct notifier_block *nb)
+{
+	return fib_rules_dump(net, nb, AF_INET);
+}
+
+unsigned int fib4_rules_seq_read(struct net *net)
+{
+	return fib_rules_seq_read(net, AF_INET);
+}
+
 int __fib_lookup(struct net *net, struct flowi4 *flp,
 		 struct fib_result *res, unsigned int flags)
 {
@@ -186,38 +195,6 @@ static struct fib_table *fib_empty_table(struct net *net)
 	return NULL;
 }
 
-static int call_fib_rule_notifier(struct notifier_block *nb, struct net *net,
-				  enum fib_event_type event_type,
-				  struct fib_rule *rule)
-{
-	struct fib_rule_notifier_info info = {
-		.rule = rule,
-	};
-
-	return call_fib4_notifier(nb, net, event_type, &info.info);
-}
-
-static int call_fib_rule_notifiers(struct net *net,
-				   enum fib_event_type event_type,
-				   struct fib_rule *rule)
-{
-	struct fib_rule_notifier_info info = {
-		.rule = rule,
-	};
-
-	return call_fib4_notifiers(net, event_type, &info.info);
-}
-
-/* Called with rcu_read_lock() */
-void fib_rules_notify(struct net *net, struct notifier_block *nb)
-{
-	struct fib_rules_ops *ops = net->ipv4.rules_ops;
-	struct fib_rule *rule;
-
-	list_for_each_entry_rcu(rule, &ops->rules_list, list)
-		call_fib_rule_notifier(nb, net, FIB_EVENT_RULE_ADD, rule);
-}
-
 static const struct nla_policy fib4_rule_policy[FRA_MAX+1] = {
 	FRA_GENERIC_POLICY,
 	[FRA_FLOW]	= { .type = NLA_U32 },
@@ -274,7 +251,6 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
 	rule4->tos = frh->tos;
 
 	net->ipv4.fib_has_custom_rules = true;
-	call_fib_rule_notifiers(net, FIB_EVENT_RULE_ADD, rule);
 
 	err = 0;
 errout:
@@ -296,7 +272,6 @@ static int fib4_rule_delete(struct fib_rule *rule)
 		net->ipv4.fib_num_tclassid_users--;
 #endif
 	net->ipv4.fib_has_custom_rules = true;
-	call_fib_rule_notifiers(net, FIB_EVENT_RULE_DEL, rule);
 errout:
 	return err;
 }
-- 
cgit v1.2.3


From e3ea973159d53559c5ae9a9dbc824da9aba6cac0 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Thu, 3 Aug 2017 13:28:15 +0200
Subject: ipv6: fib_rules: Check if rule is a default rule

As explained in commit 3c71006d15fd ("ipv4: fib_rules: Check if rule is
a default rule"), drivers supporting IPv6 FIB offload need to be able to
sanitize the rules they don't support and potentially flush their
tables.

Add an IPv6 helper to check if a FIB rule is a default rule.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip6_fib.h |  5 +++++
 net/ipv6/fib6_rules.c | 20 ++++++++++++++++++++
 2 files changed, 25 insertions(+)

(limited to 'include/net')

diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 1a88008cc6f5..6000b0dc51ee 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -295,6 +295,7 @@ int ipv6_route_open(struct inode *inode, struct file *file);
 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
 int fib6_rules_init(void);
 void fib6_rules_cleanup(void);
+bool fib6_rule_default(const struct fib_rule *rule);
 #else
 static inline int               fib6_rules_init(void)
 {
@@ -304,5 +305,9 @@ static inline void              fib6_rules_cleanup(void)
 {
 	return ;
 }
+static inline bool fib6_rule_default(const struct fib_rule *rule)
+{
+	return true;
+}
 #endif
 #endif
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index ec849d88a662..ef1fcee6bf16 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -29,6 +29,26 @@ struct fib6_rule {
 	u8			tclass;
 };
 
+static bool fib6_rule_matchall(const struct fib_rule *rule)
+{
+	struct fib6_rule *r = container_of(rule, struct fib6_rule, common);
+
+	if (r->dst.plen || r->src.plen || r->tclass)
+		return false;
+	return fib_rule_matchall(rule);
+}
+
+bool fib6_rule_default(const struct fib_rule *rule)
+{
+	if (!fib6_rule_matchall(rule) || rule->action != FR_ACT_TO_TBL ||
+	    rule->l3mdev)
+		return false;
+	if (rule->table != RT6_TABLE_LOCAL && rule->table != RT6_TABLE_MAIN)
+		return false;
+	return true;
+}
+EXPORT_SYMBOL_GPL(fib6_rule_default);
+
 struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
 				   int flags, pol_lookup_t lookup)
 {
-- 
cgit v1.2.3


From 16ab6d7d4d8cc037bb4be12c2b849ac92787e1ff Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Thu, 3 Aug 2017 13:28:16 +0200
Subject: ipv6: fib: Add FIB notifiers callbacks

We're about to add IPv6 FIB offload support, so implement the necessary
callbacks in IPv6 code, which will later allow us to add routes and
rules notifications.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip6_fib.h    | 11 ++++++++++
 include/net/netns/ipv6.h |  1 +
 net/ipv6/Makefile        |  2 +-
 net/ipv6/fib6_notifier.c | 55 ++++++++++++++++++++++++++++++++++++++++++++++++
 net/ipv6/ip6_fib.c       |  7 ++++++
 5 files changed, 75 insertions(+), 1 deletion(-)
 create mode 100644 net/ipv6/fib6_notifier.c

(limited to 'include/net')

diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 6000b0dc51ee..be8ddf3253dc 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -16,10 +16,12 @@
 #include <linux/ipv6_route.h>
 #include <linux/rtnetlink.h>
 #include <linux/spinlock.h>
+#include <linux/notifier.h>
 #include <net/dst.h>
 #include <net/flow.h>
 #include <net/netlink.h>
 #include <net/inetpeer.h>
+#include <net/fib_notifier.h>
 
 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
 #define FIB6_TABLE_HASHSZ 256
@@ -292,6 +294,15 @@ int fib6_init(void);
 
 int ipv6_route_open(struct inode *inode, struct file *file);
 
+int call_fib6_notifier(struct notifier_block *nb, struct net *net,
+		       enum fib_event_type event_type,
+		       struct fib_notifier_info *info);
+int call_fib6_notifiers(struct net *net, enum fib_event_type event_type,
+			struct fib_notifier_info *info);
+
+int __net_init fib6_notifier_init(struct net *net);
+void __net_exit fib6_notifier_exit(struct net *net);
+
 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
 int fib6_rules_init(void);
 void fib6_rules_cleanup(void);
diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index de7745e2edcc..abdf3b40303b 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -86,6 +86,7 @@ struct netns_ipv6 {
 	atomic_t		dev_addr_genid;
 	atomic_t		fib6_sernum;
 	struct seg6_pernet_data *seg6_data;
+	struct fib_notifier_ops	*notifier_ops;
 };
 
 #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
index 217e9ff0e24b..f8b24c2e0d77 100644
--- a/net/ipv6/Makefile
+++ b/net/ipv6/Makefile
@@ -9,7 +9,7 @@ ipv6-objs :=	af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o \
 		route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o udplite.o \
 		raw.o icmp.o mcast.o reassembly.o tcp_ipv6.o ping.o \
 		exthdrs.o datagram.o ip6_flowlabel.o inet6_connection_sock.o \
-		udp_offload.o seg6.o
+		udp_offload.o seg6.o fib6_notifier.o
 
 ipv6-offload :=	ip6_offload.o tcpv6_offload.o exthdrs_offload.o
 
diff --git a/net/ipv6/fib6_notifier.c b/net/ipv6/fib6_notifier.c
new file mode 100644
index 000000000000..c2bb1ab5b5eb
--- /dev/null
+++ b/net/ipv6/fib6_notifier.c
@@ -0,0 +1,55 @@
+#include <linux/notifier.h>
+#include <linux/socket.h>
+#include <linux/kernel.h>
+#include <net/net_namespace.h>
+#include <net/fib_notifier.h>
+#include <net/netns/ipv6.h>
+#include <net/ip6_fib.h>
+
+int call_fib6_notifier(struct notifier_block *nb, struct net *net,
+		       enum fib_event_type event_type,
+		       struct fib_notifier_info *info)
+{
+	info->family = AF_INET6;
+	return call_fib_notifier(nb, net, event_type, info);
+}
+
+int call_fib6_notifiers(struct net *net, enum fib_event_type event_type,
+			struct fib_notifier_info *info)
+{
+	info->family = AF_INET6;
+	return call_fib_notifiers(net, event_type, info);
+}
+
+static unsigned int fib6_seq_read(struct net *net)
+{
+	return 0;
+}
+
+static int fib6_dump(struct net *net, struct notifier_block *nb)
+{
+	return 0;
+}
+
+static const struct fib_notifier_ops fib6_notifier_ops_template = {
+	.family		= AF_INET6,
+	.fib_seq_read	= fib6_seq_read,
+	.fib_dump	= fib6_dump,
+};
+
+int __net_init fib6_notifier_init(struct net *net)
+{
+	struct fib_notifier_ops *ops;
+
+	ops = fib_notifier_ops_register(&fib6_notifier_ops_template, net);
+	if (IS_ERR(ops))
+		return PTR_ERR(ops);
+	net->ipv6.notifier_ops = ops;
+
+	return 0;
+}
+
+void __net_exit fib6_notifier_exit(struct net *net)
+{
+	fib_notifier_ops_unregister(net->ipv6.notifier_ops);
+}
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index ebb299cf72b7..f93976e3f65c 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -1839,6 +1839,11 @@ static void fib6_gc_timer_cb(unsigned long arg)
 static int __net_init fib6_net_init(struct net *net)
 {
 	size_t size = sizeof(struct hlist_head) * FIB6_TABLE_HASHSZ;
+	int err;
+
+	err = fib6_notifier_init(net);
+	if (err)
+		return err;
 
 	spin_lock_init(&net->ipv6.fib6_gc_lock);
 	rwlock_init(&net->ipv6.fib6_walker_lock);
@@ -1891,6 +1896,7 @@ out_fib_table_hash:
 out_rt6_stats:
 	kfree(net->ipv6.rt6_stats);
 out_timer:
+	fib6_notifier_exit(net);
 	return -ENOMEM;
 }
 
@@ -1907,6 +1913,7 @@ static void fib6_net_exit(struct net *net)
 	kfree(net->ipv6.fib6_main_tbl);
 	kfree(net->ipv6.fib_table_hash);
 	kfree(net->ipv6.rt6_stats);
+	fib6_notifier_exit(net);
 }
 
 static struct pernet_operations fib6_net_ops = {
-- 
cgit v1.2.3


From df77fe4d9865c6354372876632bcbceeda84f6c8 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Thu, 3 Aug 2017 13:28:17 +0200
Subject: ipv6: fib: Add in-kernel notifications for route add / delete

As with IPv4, allow listeners of the FIB notification chain to receive
notifications whenever a route is added, replaced or deleted. This is
done by placing calls to the FIB notification chain in the two lowest
level functions that end up performing these operations - namely,
fib6_add_rt2node() and fib6_del_route().

Unlike IPv4, APPEND notifications aren't sent as the kernel doesn't
distinguish between "append" (NLM_F_CREATE|NLM_F_APPEND) and "prepend"
(NLM_F_CREATE). If NLM_F_EXCL isn't set, duplicate routes are always
added after the existing duplicate routes.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip6_fib.h |  5 +++++
 net/ipv6/ip6_fib.c    | 17 +++++++++++++++++
 2 files changed, 22 insertions(+)

(limited to 'include/net')

diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index be8ddf3253dc..e2b292b79e99 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -258,6 +258,11 @@ typedef struct rt6_info *(*pol_lookup_t)(struct net *,
 					 struct fib6_table *,
 					 struct flowi6 *, int);
 
+struct fib6_entry_notifier_info {
+	struct fib_notifier_info info; /* must be first */
+	struct rt6_info *rt;
+};
+
 /*
  *	exported functions
  */
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index f93976e3f65c..595a57cbbc7b 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -33,6 +33,7 @@
 #include <net/ndisc.h>
 #include <net/addrconf.h>
 #include <net/lwtunnel.h>
+#include <net/fib_notifier.h>
 
 #include <net/ip6_fib.h>
 #include <net/ip6_route.h>
@@ -302,6 +303,17 @@ static void __net_init fib6_tables_init(struct net *net)
 
 #endif
 
+static int call_fib6_entry_notifiers(struct net *net,
+				     enum fib_event_type event_type,
+				     struct rt6_info *rt)
+{
+	struct fib6_entry_notifier_info info = {
+		.rt = rt,
+	};
+
+	return call_fib6_notifiers(net, event_type, &info.info);
+}
+
 static int fib6_dump_node(struct fib6_walker *w)
 {
 	int res;
@@ -879,6 +891,8 @@ add:
 		*ins = rt;
 		rt->rt6i_node = fn;
 		atomic_inc(&rt->rt6i_ref);
+		call_fib6_entry_notifiers(info->nl_net, FIB_EVENT_ENTRY_ADD,
+					  rt);
 		if (!info->skip_notify)
 			inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
 		info->nl_net->ipv6.rt6_stats->fib_rt_entries++;
@@ -906,6 +920,8 @@ add:
 		rt->rt6i_node = fn;
 		rt->dst.rt6_next = iter->dst.rt6_next;
 		atomic_inc(&rt->rt6i_ref);
+		call_fib6_entry_notifiers(info->nl_net, FIB_EVENT_ENTRY_REPLACE,
+					  rt);
 		if (!info->skip_notify)
 			inet6_rt_notify(RTM_NEWROUTE, rt, info, NLM_F_REPLACE);
 		if (!(fn->fn_flags & RTN_RTINFO)) {
@@ -1459,6 +1475,7 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp,
 
 	fib6_purge_rt(rt, fn, net);
 
+	call_fib6_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, rt);
 	if (!info->skip_notify)
 		inet6_rt_notify(RTM_DELROUTE, rt, info, 0);
 	rt6_release(rt);
-- 
cgit v1.2.3


From dcb18f762f6ac83a6dc9cdc26dd694dcc167beb7 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Thu, 3 Aug 2017 13:28:18 +0200
Subject: ipv6: fib_rules: Dump rules during registration to FIB chain

Allow users of the FIB notification chain to receive a complete view of
the IPv6 FIB rules upon registration to the chain.

The integrity of the dump is ensured by a per-family sequence counter
that is incremented (under RTNL) whenever a rule is added or deleted.

All the sequence counters are read (under RTNL) and summed, prior and
after the dump. In case the counters differ, then the dump is either
restarted or the registration fails.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip6_fib.h    | 10 ++++++++++
 net/ipv6/fib6_notifier.c |  4 ++--
 net/ipv6/fib6_rules.c    | 11 +++++++++++
 3 files changed, 23 insertions(+), 2 deletions(-)

(limited to 'include/net')

diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index e2b292b79e99..dbe5537809f5 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -312,6 +312,8 @@ void __net_exit fib6_notifier_exit(struct net *net);
 int fib6_rules_init(void);
 void fib6_rules_cleanup(void);
 bool fib6_rule_default(const struct fib_rule *rule);
+int fib6_rules_dump(struct net *net, struct notifier_block *nb);
+unsigned int fib6_rules_seq_read(struct net *net);
 #else
 static inline int               fib6_rules_init(void)
 {
@@ -325,5 +327,13 @@ static inline bool fib6_rule_default(const struct fib_rule *rule)
 {
 	return true;
 }
+static inline int fib6_rules_dump(struct net *net, struct notifier_block *nb)
+{
+	return 0;
+}
+static inline unsigned int fib6_rules_seq_read(struct net *net)
+{
+	return 0;
+}
 #endif
 #endif
diff --git a/net/ipv6/fib6_notifier.c b/net/ipv6/fib6_notifier.c
index c2bb1ab5b5eb..298efc678f3b 100644
--- a/net/ipv6/fib6_notifier.c
+++ b/net/ipv6/fib6_notifier.c
@@ -23,12 +23,12 @@ int call_fib6_notifiers(struct net *net, enum fib_event_type event_type,
 
 static unsigned int fib6_seq_read(struct net *net)
 {
-	return 0;
+	return fib6_rules_seq_read(net);
 }
 
 static int fib6_dump(struct net *net, struct notifier_block *nb)
 {
-	return 0;
+	return fib6_rules_dump(net, nb);
 }
 
 static const struct fib_notifier_ops fib6_notifier_ops_template = {
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index ef1fcee6bf16..2f29e4e33bd3 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -14,6 +14,7 @@
  */
 
 #include <linux/netdevice.h>
+#include <linux/notifier.h>
 #include <linux/export.h>
 
 #include <net/fib_rules.h>
@@ -49,6 +50,16 @@ bool fib6_rule_default(const struct fib_rule *rule)
 }
 EXPORT_SYMBOL_GPL(fib6_rule_default);
 
+int fib6_rules_dump(struct net *net, struct notifier_block *nb)
+{
+	return fib_rules_dump(net, nb, AF_INET6);
+}
+
+unsigned int fib6_rules_seq_read(struct net *net)
+{
+	return fib_rules_seq_read(net, AF_INET6);
+}
+
 struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
 				   int flags, pol_lookup_t lookup)
 {
-- 
cgit v1.2.3


From e1ee0a5ba35d999caef94d659b4cb842e63aeb68 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Thu, 3 Aug 2017 13:28:19 +0200
Subject: ipv6: fib: Dump tables during registration to FIB chain

Dump all the FIB tables in each net namespace upon registration to the
FIB notification chain so that the callee will have a complete view of
the tables.

The integrity of the dump is ensured by a per-table sequence counter
that is incremented (under write lock) whenever a route is added or
deleted from the table.

All the sequence counters are read (under each table's read lock) and
summed, prior and after the dump. In case the counters differ, then the
dump is either restarted or the registration fails.

While it's possible for a table to be modified after its counter has
been read, this isn't really a problem. In case it happened before it
was read the second time, then the comparison at the end will fail. If
it happened afterwards, then we're guaranteed to be notified about the
change, as the notification block is registered prior to the second
read.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip6_fib.h    |  4 +++
 net/ipv6/fib6_notifier.c | 10 ++++--
 net/ipv6/ip6_fib.c       | 92 ++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 104 insertions(+), 2 deletions(-)

(limited to 'include/net')

diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index dbe5537809f5..0b3052157e6b 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -235,6 +235,7 @@ struct fib6_table {
 	struct fib6_node	tb6_root;
 	struct inet_peer_base	tb6_peers;
 	unsigned int		flags;
+	unsigned int		fib_seq;
 #define RT6_TABLE_HAS_DFLT_ROUTER	BIT(0)
 };
 
@@ -308,6 +309,9 @@ int call_fib6_notifiers(struct net *net, enum fib_event_type event_type,
 int __net_init fib6_notifier_init(struct net *net);
 void __net_exit fib6_notifier_exit(struct net *net);
 
+unsigned int fib6_tables_seq_read(struct net *net);
+int fib6_tables_dump(struct net *net, struct notifier_block *nb);
+
 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
 int fib6_rules_init(void);
 void fib6_rules_cleanup(void);
diff --git a/net/ipv6/fib6_notifier.c b/net/ipv6/fib6_notifier.c
index 298efc678f3b..66a103ef7e86 100644
--- a/net/ipv6/fib6_notifier.c
+++ b/net/ipv6/fib6_notifier.c
@@ -23,12 +23,18 @@ int call_fib6_notifiers(struct net *net, enum fib_event_type event_type,
 
 static unsigned int fib6_seq_read(struct net *net)
 {
-	return fib6_rules_seq_read(net);
+	return fib6_tables_seq_read(net) + fib6_rules_seq_read(net);
 }
 
 static int fib6_dump(struct net *net, struct notifier_block *nb)
 {
-	return fib6_rules_dump(net, nb);
+	int err;
+
+	err = fib6_rules_dump(net, nb);
+	if (err)
+		return err;
+
+	return fib6_tables_dump(net, nb);
 }
 
 static const struct fib_notifier_ops fib6_notifier_ops_template = {
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 595a57cbbc7b..719c10480c74 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -303,6 +303,37 @@ static void __net_init fib6_tables_init(struct net *net)
 
 #endif
 
+unsigned int fib6_tables_seq_read(struct net *net)
+{
+	unsigned int h, fib_seq = 0;
+
+	rcu_read_lock();
+	for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
+		struct hlist_head *head = &net->ipv6.fib_table_hash[h];
+		struct fib6_table *tb;
+
+		hlist_for_each_entry_rcu(tb, head, tb6_hlist) {
+			read_lock_bh(&tb->tb6_lock);
+			fib_seq += tb->fib_seq;
+			read_unlock_bh(&tb->tb6_lock);
+		}
+	}
+	rcu_read_unlock();
+
+	return fib_seq;
+}
+
+static int call_fib6_entry_notifier(struct notifier_block *nb, struct net *net,
+				    enum fib_event_type event_type,
+				    struct rt6_info *rt)
+{
+	struct fib6_entry_notifier_info info = {
+		.rt = rt,
+	};
+
+	return call_fib6_notifier(nb, net, event_type, &info.info);
+}
+
 static int call_fib6_entry_notifiers(struct net *net,
 				     enum fib_event_type event_type,
 				     struct rt6_info *rt)
@@ -311,9 +342,70 @@ static int call_fib6_entry_notifiers(struct net *net,
 		.rt = rt,
 	};
 
+	rt->rt6i_table->fib_seq++;
 	return call_fib6_notifiers(net, event_type, &info.info);
 }
 
+struct fib6_dump_arg {
+	struct net *net;
+	struct notifier_block *nb;
+};
+
+static void fib6_rt_dump(struct rt6_info *rt, struct fib6_dump_arg *arg)
+{
+	if (rt == arg->net->ipv6.ip6_null_entry)
+		return;
+	call_fib6_entry_notifier(arg->nb, arg->net, FIB_EVENT_ENTRY_ADD, rt);
+}
+
+static int fib6_node_dump(struct fib6_walker *w)
+{
+	struct rt6_info *rt;
+
+	for (rt = w->leaf; rt; rt = rt->dst.rt6_next)
+		fib6_rt_dump(rt, w->args);
+	w->leaf = NULL;
+	return 0;
+}
+
+static void fib6_table_dump(struct net *net, struct fib6_table *tb,
+			    struct fib6_walker *w)
+{
+	w->root = &tb->tb6_root;
+	read_lock_bh(&tb->tb6_lock);
+	fib6_walk(net, w);
+	read_unlock_bh(&tb->tb6_lock);
+}
+
+/* Called with rcu_read_lock() */
+int fib6_tables_dump(struct net *net, struct notifier_block *nb)
+{
+	struct fib6_dump_arg arg;
+	struct fib6_walker *w;
+	unsigned int h;
+
+	w = kzalloc(sizeof(*w), GFP_ATOMIC);
+	if (!w)
+		return -ENOMEM;
+
+	w->func = fib6_node_dump;
+	arg.net = net;
+	arg.nb = nb;
+	w->args = &arg;
+
+	for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
+		struct hlist_head *head = &net->ipv6.fib_table_hash[h];
+		struct fib6_table *tb;
+
+		hlist_for_each_entry_rcu(tb, head, tb6_hlist)
+			fib6_table_dump(net, tb, w);
+	}
+
+	kfree(w);
+
+	return 0;
+}
+
 static int fib6_dump_node(struct fib6_walker *w)
 {
 	int res;
-- 
cgit v1.2.3


From a460aa83963b185a32a6377eb486b6e613ac8e38 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Thu, 3 Aug 2017 13:28:25 +0200
Subject: ipv6: fib: Add helpers to hold / drop a reference on rt6_info

Similar to commit 1c677b3d2828 ("ipv4: fib: Add fib_info_hold() helper")
and commit b423cb10807b ("ipv4: fib: Export free_fib_info()") add an
helper to hold a reference on rt6_info and export rt6_release() to drop
it and potentially release the route.

This is needed so that drivers capable of FIB offload could hold a
reference on the route before queueing it for offload and drop it after
the route has been programmed to the device's tables.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip6_fib.h | 16 ++++++++++++++++
 net/ipv6/ip6_fib.c    | 12 ++----------
 2 files changed, 18 insertions(+), 10 deletions(-)

(limited to 'include/net')

diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 0b3052157e6b..1d790ea40ea7 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -187,6 +187,22 @@ static inline void ip6_rt_put(struct rt6_info *rt)
 	dst_release(&rt->dst);
 }
 
+void rt6_free_pcpu(struct rt6_info *non_pcpu_rt);
+
+static inline void rt6_hold(struct rt6_info *rt)
+{
+	atomic_inc(&rt->rt6i_ref);
+}
+
+static inline void rt6_release(struct rt6_info *rt)
+{
+	if (atomic_dec_and_test(&rt->rt6i_ref)) {
+		rt6_free_pcpu(rt);
+		dst_dev_put(&rt->dst);
+		dst_release(&rt->dst);
+	}
+}
+
 enum fib6_walk_state {
 #ifdef CONFIG_IPV6_SUBTREES
 	FWS_S,
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index fe193b48ef61..69ed0043d117 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -154,7 +154,7 @@ static void node_free(struct fib6_node *fn)
 	kmem_cache_free(fib6_node_kmem, fn);
 }
 
-static void rt6_free_pcpu(struct rt6_info *non_pcpu_rt)
+void rt6_free_pcpu(struct rt6_info *non_pcpu_rt)
 {
 	int cpu;
 
@@ -177,15 +177,7 @@ static void rt6_free_pcpu(struct rt6_info *non_pcpu_rt)
 	free_percpu(non_pcpu_rt->rt6i_pcpu);
 	non_pcpu_rt->rt6i_pcpu = NULL;
 }
-
-static void rt6_release(struct rt6_info *rt)
-{
-	if (atomic_dec_and_test(&rt->rt6i_ref)) {
-		rt6_free_pcpu(rt);
-		dst_dev_put(&rt->dst);
-		dst_release(&rt->dst);
-	}
-}
+EXPORT_SYMBOL_GPL(rt6_free_pcpu);
 
 static void fib6_link_table(struct net *net, struct fib6_table *tb)
 {
-- 
cgit v1.2.3


From 98ba0bd5505dcbb90322a4be07bcfe6b8a18c73f Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Thu, 3 Aug 2017 16:29:37 -0400
Subject: sock: allocate skbs from optmem

Add sock_omalloc and sock_ofree to be able to allocate control skbs,
for instance for looping errors onto sk_error_queue.

The transmit budget (sk_wmem_alloc) is involved in transmit skb
shaping, most notably in TCP Small Queues. Using this budget for
control packets would impact transmission.

Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h |  2 ++
 net/core/sock.c    | 27 +++++++++++++++++++++++++++
 2 files changed, 29 insertions(+)

(limited to 'include/net')

diff --git a/include/net/sock.h b/include/net/sock.h
index 393c38e9f6aa..0f778d3c4300 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1531,6 +1531,8 @@ struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
 			     gfp_t priority);
 void __sock_wfree(struct sk_buff *skb);
 void sock_wfree(struct sk_buff *skb);
+struct sk_buff *sock_omalloc(struct sock *sk, unsigned long size,
+			     gfp_t priority);
 void skb_orphan_partial(struct sk_buff *skb);
 void sock_rfree(struct sk_buff *skb);
 void sock_efree(struct sk_buff *skb);
diff --git a/net/core/sock.c b/net/core/sock.c
index 742f68c9c84a..1261880bdcc8 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1923,6 +1923,33 @@ struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
 }
 EXPORT_SYMBOL(sock_wmalloc);
 
+static void sock_ofree(struct sk_buff *skb)
+{
+	struct sock *sk = skb->sk;
+
+	atomic_sub(skb->truesize, &sk->sk_omem_alloc);
+}
+
+struct sk_buff *sock_omalloc(struct sock *sk, unsigned long size,
+			     gfp_t priority)
+{
+	struct sk_buff *skb;
+
+	/* small safe race: SKB_TRUESIZE may differ from final skb->truesize */
+	if (atomic_read(&sk->sk_omem_alloc) + SKB_TRUESIZE(size) >
+	    sysctl_optmem_max)
+		return NULL;
+
+	skb = alloc_skb(size, priority);
+	if (!skb)
+		return NULL;
+
+	atomic_add(skb->truesize, &sk->sk_omem_alloc);
+	skb->sk = sk;
+	skb->destructor = sock_ofree;
+	return skb;
+}
+
 /*
  * Allocate a memory block from the socket's option memory buffer.
  */
-- 
cgit v1.2.3


From 52267790ef52d7513879238ca9fac22c1733e0e3 Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Thu, 3 Aug 2017 16:29:39 -0400
Subject: sock: add MSG_ZEROCOPY

The kernel supports zerocopy sendmsg in virtio and tap. Expand the
infrastructure to support other socket types. Introduce a completion
notification channel over the socket error queue. Notifications are
returned with ee_origin SO_EE_ORIGIN_ZEROCOPY. ee_errno is 0 to avoid
blocking the send/recv path on receiving notifications.

Add reference counting, to support the skb split, merge, resize and
clone operations possible with SOCK_STREAM and other socket types.

The patch does not yet modify any datapaths.

Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h        |  60 +++++++++++++++++++
 include/linux/socket.h        |   1 +
 include/net/sock.h            |   2 +
 include/uapi/linux/errqueue.h |   3 +
 net/core/datagram.c           |  55 ++++++++++-------
 net/core/skbuff.c             | 133 ++++++++++++++++++++++++++++++++++++++++++
 net/core/sock.c               |   2 +
 7 files changed, 235 insertions(+), 21 deletions(-)

(limited to 'include/net')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 2f64e2bbb592..59cff7aa494e 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -429,6 +429,7 @@ enum {
 	SKBTX_SCHED_TSTAMP = 1 << 6,
 };
 
+#define SKBTX_ZEROCOPY_FRAG	(SKBTX_DEV_ZEROCOPY | SKBTX_SHARED_FRAG)
 #define SKBTX_ANY_SW_TSTAMP	(SKBTX_SW_TSTAMP    | \
 				 SKBTX_SCHED_TSTAMP)
 #define SKBTX_ANY_TSTAMP	(SKBTX_HW_TSTAMP | SKBTX_ANY_SW_TSTAMP)
@@ -445,8 +446,28 @@ struct ubuf_info {
 	void (*callback)(struct ubuf_info *, bool zerocopy_success);
 	void *ctx;
 	unsigned long desc;
+	u16 zerocopy:1;
+	atomic_t refcnt;
 };
 
+#define skb_uarg(SKB)	((struct ubuf_info *)(skb_shinfo(SKB)->destructor_arg))
+
+struct ubuf_info *sock_zerocopy_alloc(struct sock *sk, size_t size);
+
+static inline void sock_zerocopy_get(struct ubuf_info *uarg)
+{
+	atomic_inc(&uarg->refcnt);
+}
+
+void sock_zerocopy_put(struct ubuf_info *uarg);
+void sock_zerocopy_put_abort(struct ubuf_info *uarg);
+
+void sock_zerocopy_callback(struct ubuf_info *uarg, bool success);
+
+int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb,
+			     struct msghdr *msg, int len,
+			     struct ubuf_info *uarg);
+
 /* This data is invariant across clones and lives at
  * the end of the header data, ie. at skb->end.
  */
@@ -1214,6 +1235,45 @@ static inline struct skb_shared_hwtstamps *skb_hwtstamps(struct sk_buff *skb)
 	return &skb_shinfo(skb)->hwtstamps;
 }
 
+static inline struct ubuf_info *skb_zcopy(struct sk_buff *skb)
+{
+	bool is_zcopy = skb && skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY;
+
+	return is_zcopy ? skb_uarg(skb) : NULL;
+}
+
+static inline void skb_zcopy_set(struct sk_buff *skb, struct ubuf_info *uarg)
+{
+	if (skb && uarg && !skb_zcopy(skb)) {
+		sock_zerocopy_get(uarg);
+		skb_shinfo(skb)->destructor_arg = uarg;
+		skb_shinfo(skb)->tx_flags |= SKBTX_ZEROCOPY_FRAG;
+	}
+}
+
+/* Release a reference on a zerocopy structure */
+static inline void skb_zcopy_clear(struct sk_buff *skb, bool zerocopy)
+{
+	struct ubuf_info *uarg = skb_zcopy(skb);
+
+	if (uarg) {
+		uarg->zerocopy = uarg->zerocopy && zerocopy;
+		sock_zerocopy_put(uarg);
+		skb_shinfo(skb)->tx_flags &= ~SKBTX_ZEROCOPY_FRAG;
+	}
+}
+
+/* Abort a zerocopy operation and revert zckey on error in send syscall */
+static inline void skb_zcopy_abort(struct sk_buff *skb)
+{
+	struct ubuf_info *uarg = skb_zcopy(skb);
+
+	if (uarg) {
+		sock_zerocopy_put_abort(uarg);
+		skb_shinfo(skb)->tx_flags &= ~SKBTX_ZEROCOPY_FRAG;
+	}
+}
+
 /**
  *	skb_queue_empty - check if a queue is empty
  *	@list: queue head
diff --git a/include/linux/socket.h b/include/linux/socket.h
index 8b13db5163cc..8ad963cdc88c 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -287,6 +287,7 @@ struct ucred {
 #define MSG_BATCH	0x40000 /* sendmmsg(): more messages coming */
 #define MSG_EOF         MSG_FIN
 
+#define MSG_ZEROCOPY	0x4000000	/* Use user data in kernel path */
 #define MSG_FASTOPEN	0x20000000	/* Send data in TCP SYN */
 #define MSG_CMSG_CLOEXEC 0x40000000	/* Set close_on_exec for file
 					   descriptor received through
diff --git a/include/net/sock.h b/include/net/sock.h
index 0f778d3c4300..fe1a0bc25cd3 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -294,6 +294,7 @@ struct sock_common {
   *	@sk_stamp: time stamp of last packet received
   *	@sk_tsflags: SO_TIMESTAMPING socket options
   *	@sk_tskey: counter to disambiguate concurrent tstamp requests
+  *	@sk_zckey: counter to order MSG_ZEROCOPY notifications
   *	@sk_socket: Identd and reporting IO signals
   *	@sk_user_data: RPC layer private data
   *	@sk_frag: cached page frag
@@ -462,6 +463,7 @@ struct sock {
 	u16			sk_tsflags;
 	u8			sk_shutdown;
 	u32			sk_tskey;
+	atomic_t		sk_zckey;
 	struct socket		*sk_socket;
 	void			*sk_user_data;
 #ifdef CONFIG_SECURITY
diff --git a/include/uapi/linux/errqueue.h b/include/uapi/linux/errqueue.h
index 07bdce1f444a..78fdf52d6b2f 100644
--- a/include/uapi/linux/errqueue.h
+++ b/include/uapi/linux/errqueue.h
@@ -18,10 +18,13 @@ struct sock_extended_err {
 #define SO_EE_ORIGIN_ICMP	2
 #define SO_EE_ORIGIN_ICMP6	3
 #define SO_EE_ORIGIN_TXSTATUS	4
+#define SO_EE_ORIGIN_ZEROCOPY	5
 #define SO_EE_ORIGIN_TIMESTAMPING SO_EE_ORIGIN_TXSTATUS
 
 #define SO_EE_OFFENDER(ee)	((struct sockaddr*)((ee)+1))
 
+#define SO_EE_CODE_ZEROCOPY_COPIED	1
+
 /**
  *	struct scm_timestamping - timestamps exposed through cmsg
  *
diff --git a/net/core/datagram.c b/net/core/datagram.c
index ee5647bd91b3..2f3277945d35 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -573,27 +573,12 @@ fault:
 }
 EXPORT_SYMBOL(skb_copy_datagram_from_iter);
 
-/**
- *	zerocopy_sg_from_iter - Build a zerocopy datagram from an iov_iter
- *	@skb: buffer to copy
- *	@from: the source to copy from
- *
- *	The function will first copy up to headlen, and then pin the userspace
- *	pages and build frags through them.
- *
- *	Returns 0, -EFAULT or -EMSGSIZE.
- */
-int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *from)
+int __zerocopy_sg_from_iter(struct sock *sk, struct sk_buff *skb,
+			    struct iov_iter *from, size_t length)
 {
-	int len = iov_iter_count(from);
-	int copy = min_t(int, skb_headlen(skb), len);
-	int frag = 0;
+	int frag = skb_shinfo(skb)->nr_frags;
 
-	/* copy up to skb headlen */
-	if (skb_copy_datagram_from_iter(skb, 0, from, copy))
-		return -EFAULT;
-
-	while (iov_iter_count(from)) {
+	while (length && iov_iter_count(from)) {
 		struct page *pages[MAX_SKB_FRAGS];
 		size_t start;
 		ssize_t copied;
@@ -603,18 +588,24 @@ int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *from)
 		if (frag == MAX_SKB_FRAGS)
 			return -EMSGSIZE;
 
-		copied = iov_iter_get_pages(from, pages, ~0U,
+		copied = iov_iter_get_pages(from, pages, length,
 					    MAX_SKB_FRAGS - frag, &start);
 		if (copied < 0)
 			return -EFAULT;
 
 		iov_iter_advance(from, copied);
+		length -= copied;
 
 		truesize = PAGE_ALIGN(copied + start);
 		skb->data_len += copied;
 		skb->len += copied;
 		skb->truesize += truesize;
-		refcount_add(truesize, &skb->sk->sk_wmem_alloc);
+		if (sk && sk->sk_type == SOCK_STREAM) {
+			sk->sk_wmem_queued += truesize;
+			sk_mem_charge(sk, truesize);
+		} else {
+			refcount_add(truesize, &skb->sk->sk_wmem_alloc);
+		}
 		while (copied) {
 			int size = min_t(int, copied, PAGE_SIZE - start);
 			skb_fill_page_desc(skb, frag++, pages[n], start, size);
@@ -625,6 +616,28 @@ int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *from)
 	}
 	return 0;
 }
+EXPORT_SYMBOL(__zerocopy_sg_from_iter);
+
+/**
+ *	zerocopy_sg_from_iter - Build a zerocopy datagram from an iov_iter
+ *	@skb: buffer to copy
+ *	@from: the source to copy from
+ *
+ *	The function will first copy up to headlen, and then pin the userspace
+ *	pages and build frags through them.
+ *
+ *	Returns 0, -EFAULT or -EMSGSIZE.
+ */
+int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *from)
+{
+	int copy = min_t(int, skb_headlen(skb), iov_iter_count(from));
+
+	/* copy up to skb headlen */
+	if (skb_copy_datagram_from_iter(skb, 0, from, copy))
+		return -EFAULT;
+
+	return __zerocopy_sg_from_iter(NULL, skb, from, ~0U);
+}
 EXPORT_SYMBOL(zerocopy_sg_from_iter);
 
 static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset,
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index a95877a8ac8b..0603e44950da 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -915,6 +915,139 @@ struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src)
 }
 EXPORT_SYMBOL_GPL(skb_morph);
 
+struct ubuf_info *sock_zerocopy_alloc(struct sock *sk, size_t size)
+{
+	struct ubuf_info *uarg;
+	struct sk_buff *skb;
+
+	WARN_ON_ONCE(!in_task());
+
+	skb = sock_omalloc(sk, 0, GFP_KERNEL);
+	if (!skb)
+		return NULL;
+
+	BUILD_BUG_ON(sizeof(*uarg) > sizeof(skb->cb));
+	uarg = (void *)skb->cb;
+
+	uarg->callback = sock_zerocopy_callback;
+	uarg->desc = atomic_inc_return(&sk->sk_zckey) - 1;
+	uarg->zerocopy = 1;
+	atomic_set(&uarg->refcnt, 0);
+	sock_hold(sk);
+
+	return uarg;
+}
+EXPORT_SYMBOL_GPL(sock_zerocopy_alloc);
+
+static inline struct sk_buff *skb_from_uarg(struct ubuf_info *uarg)
+{
+	return container_of((void *)uarg, struct sk_buff, cb);
+}
+
+void sock_zerocopy_callback(struct ubuf_info *uarg, bool success)
+{
+	struct sk_buff *skb = skb_from_uarg(uarg);
+	struct sock_exterr_skb *serr;
+	struct sock *sk = skb->sk;
+	u16 id = uarg->desc;
+
+	if (sock_flag(sk, SOCK_DEAD))
+		goto release;
+
+	serr = SKB_EXT_ERR(skb);
+	memset(serr, 0, sizeof(*serr));
+	serr->ee.ee_errno = 0;
+	serr->ee.ee_origin = SO_EE_ORIGIN_ZEROCOPY;
+	serr->ee.ee_data = id;
+	if (!success)
+		serr->ee.ee_code |= SO_EE_CODE_ZEROCOPY_COPIED;
+
+	skb_queue_tail(&sk->sk_error_queue, skb);
+	skb = NULL;
+
+	sk->sk_error_report(sk);
+
+release:
+	consume_skb(skb);
+	sock_put(sk);
+}
+EXPORT_SYMBOL_GPL(sock_zerocopy_callback);
+
+void sock_zerocopy_put(struct ubuf_info *uarg)
+{
+	if (uarg && atomic_dec_and_test(&uarg->refcnt)) {
+		if (uarg->callback)
+			uarg->callback(uarg, uarg->zerocopy);
+		else
+			consume_skb(skb_from_uarg(uarg));
+	}
+}
+EXPORT_SYMBOL_GPL(sock_zerocopy_put);
+
+void sock_zerocopy_put_abort(struct ubuf_info *uarg)
+{
+	if (uarg) {
+		struct sock *sk = skb_from_uarg(uarg)->sk;
+
+		atomic_dec(&sk->sk_zckey);
+
+		/* sock_zerocopy_put expects a ref. Most sockets take one per
+		 * skb, which is zero on abort. tcp_sendmsg holds one extra, to
+		 * avoid an skb send inside the main loop triggering uarg free.
+		 */
+		if (sk->sk_type != SOCK_STREAM)
+			atomic_inc(&uarg->refcnt);
+
+		sock_zerocopy_put(uarg);
+	}
+}
+EXPORT_SYMBOL_GPL(sock_zerocopy_put_abort);
+
+extern int __zerocopy_sg_from_iter(struct sock *sk, struct sk_buff *skb,
+				   struct iov_iter *from, size_t length);
+
+int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb,
+			     struct msghdr *msg, int len,
+			     struct ubuf_info *uarg)
+{
+	struct iov_iter orig_iter = msg->msg_iter;
+	int err, orig_len = skb->len;
+
+	err = __zerocopy_sg_from_iter(sk, skb, &msg->msg_iter, len);
+	if (err == -EFAULT || (err == -EMSGSIZE && skb->len == orig_len)) {
+		/* Streams do not free skb on error. Reset to prev state. */
+		msg->msg_iter = orig_iter;
+		___pskb_trim(skb, orig_len);
+		return err;
+	}
+
+	skb_zcopy_set(skb, uarg);
+	return skb->len - orig_len;
+}
+EXPORT_SYMBOL_GPL(skb_zerocopy_iter_stream);
+
+/* unused only until next patch in the series; will remove attribute */
+static int __attribute__((unused))
+	   skb_zerocopy_clone(struct sk_buff *nskb, struct sk_buff *orig,
+			      gfp_t gfp_mask)
+{
+	if (skb_zcopy(orig)) {
+		if (skb_zcopy(nskb)) {
+			/* !gfp_mask callers are verified to !skb_zcopy(nskb) */
+			if (!gfp_mask) {
+				WARN_ON_ONCE(1);
+				return -ENOMEM;
+			}
+			if (skb_uarg(nskb) == skb_uarg(orig))
+				return 0;
+			if (skb_copy_ubufs(nskb, GFP_ATOMIC))
+				return -EIO;
+		}
+		skb_zcopy_set(nskb, skb_uarg(orig));
+	}
+	return 0;
+}
+
 /**
  *	skb_copy_ubufs	-	copy userspace skb frags buffers to kernel
  *	@skb: the skb to modify
diff --git a/net/core/sock.c b/net/core/sock.c
index 1261880bdcc8..e8b696858cad 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1670,6 +1670,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
 		atomic_set(&newsk->sk_drops, 0);
 		newsk->sk_send_head	= NULL;
 		newsk->sk_userlocks	= sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
+		atomic_set(&newsk->sk_zckey, 0);
 
 		sock_reset_flag(newsk, SOCK_DONE);
 
@@ -2722,6 +2723,7 @@ void sock_init_data(struct socket *sock, struct sock *sk)
 	sk->sk_sndtimeo		=	MAX_SCHEDULE_TIMEOUT;
 
 	sk->sk_stamp = SK_DEFAULT_STAMP;
+	atomic_set(&sk->sk_zckey, 0);
 
 #ifdef CONFIG_NET_RX_BUSY_POLL
 	sk->sk_napi_id		=	0;
-- 
cgit v1.2.3


From 4ebc1e3cfcd8778e2150bdb799b19e85348b8efa Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Fri, 4 Aug 2017 14:28:57 +0200
Subject: net: sched: remove unneeded tcf_em_tree_change

Since tcf_em_tree_validate could be always called on a newly created
filter, there is no need for this change function.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/pkt_cls.h  | 21 ---------------------
 net/sched/cls_basic.c  |  4 +---
 net/sched/cls_cgroup.c |  4 +---
 net/sched/cls_flow.c   | 12 +++++-------
 4 files changed, 7 insertions(+), 34 deletions(-)

(limited to 'include/net')

diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 537d0a0ad4c4..f4462ec8b2f4 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -332,26 +332,6 @@ int tcf_em_tree_dump(struct sk_buff *, struct tcf_ematch_tree *, int);
 int __tcf_em_tree_match(struct sk_buff *, struct tcf_ematch_tree *,
 			struct tcf_pkt_info *);
 
-/**
- * tcf_em_tree_change - replace ematch tree of a running classifier
- *
- * @tp: classifier kind handle
- * @dst: destination ematch tree variable
- * @src: source ematch tree (temporary tree from tcf_em_tree_validate)
- *
- * This functions replaces the ematch tree in @dst with the ematch
- * tree in @src. The classifier in charge of the ematch tree may be
- * running.
- */
-static inline void tcf_em_tree_change(struct tcf_proto *tp,
-				      struct tcf_ematch_tree *dst,
-				      struct tcf_ematch_tree *src)
-{
-	tcf_tree_lock(tp);
-	memcpy(dst, src, sizeof(*dst));
-	tcf_tree_unlock(tp);
-}
-
 /**
  * tcf_em_tree_match - evaulate an ematch tree
  *
@@ -386,7 +366,6 @@ struct tcf_ematch_tree {
 #define tcf_em_tree_validate(tp, tb, t) ((void)(t), 0)
 #define tcf_em_tree_destroy(t) do { (void)(t); } while(0)
 #define tcf_em_tree_dump(skb, t, tlv) (0)
-#define tcf_em_tree_change(tp, dst, src) do { } while(0)
 #define tcf_em_tree_match(skb, t, info) ((void)(info), 1)
 
 #endif /* CONFIG_NET_EMATCH */
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index c4fd63a068f9..979cd2683b46 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -130,7 +130,6 @@ static int basic_set_parms(struct net *net, struct tcf_proto *tp,
 {
 	int err;
 	struct tcf_exts e;
-	struct tcf_ematch_tree t;
 
 	err = tcf_exts_init(&e, TCA_BASIC_ACT, TCA_BASIC_POLICE);
 	if (err < 0)
@@ -139,7 +138,7 @@ static int basic_set_parms(struct net *net, struct tcf_proto *tp,
 	if (err < 0)
 		goto errout;
 
-	err = tcf_em_tree_validate(tp, tb[TCA_BASIC_EMATCHES], &t);
+	err = tcf_em_tree_validate(tp, tb[TCA_BASIC_EMATCHES], &f->ematches);
 	if (err < 0)
 		goto errout;
 
@@ -149,7 +148,6 @@ static int basic_set_parms(struct net *net, struct tcf_proto *tp,
 	}
 
 	tcf_exts_change(tp, &f->exts, &e);
-	tcf_em_tree_change(tp, &f->ematches, &t);
 	f->tp = tp;
 
 	return 0;
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index 12ce547eea04..ce7d38beab95 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -76,7 +76,6 @@ static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb,
 	struct nlattr *tb[TCA_CGROUP_MAX + 1];
 	struct cls_cgroup_head *head = rtnl_dereference(tp->root);
 	struct cls_cgroup_head *new;
-	struct tcf_ematch_tree t;
 	struct tcf_exts e;
 	int err;
 
@@ -112,14 +111,13 @@ static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb,
 		goto errout;
 	}
 
-	err = tcf_em_tree_validate(tp, tb[TCA_CGROUP_EMATCHES], &t);
+	err = tcf_em_tree_validate(tp, tb[TCA_CGROUP_EMATCHES], &new->ematches);
 	if (err < 0) {
 		tcf_exts_destroy(&e);
 		goto errout;
 	}
 
 	tcf_exts_change(tp, &new->exts, &e);
-	tcf_em_tree_change(tp, &new->ematches, &t);
 
 	rcu_assign_pointer(tp->root, new);
 	if (head)
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index 3065752b9cda..71fd1af01726 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -389,7 +389,6 @@ static int flow_change(struct net *net, struct sk_buff *in_skb,
 	struct nlattr *opt = tca[TCA_OPTIONS];
 	struct nlattr *tb[TCA_FLOW_MAX + 1];
 	struct tcf_exts e;
-	struct tcf_ematch_tree t;
 	unsigned int nkeys = 0;
 	unsigned int perturb_period = 0;
 	u32 baseclass = 0;
@@ -432,13 +431,13 @@ static int flow_change(struct net *net, struct sk_buff *in_skb,
 	if (err < 0)
 		goto err1;
 
-	err = tcf_em_tree_validate(tp, tb[TCA_FLOW_EMATCHES], &t);
-	if (err < 0)
-		goto err1;
-
 	err = -ENOBUFS;
 	fnew = kzalloc(sizeof(*fnew), GFP_KERNEL);
 	if (!fnew)
+		goto err1;
+
+	err = tcf_em_tree_validate(tp, tb[TCA_FLOW_EMATCHES], &fnew->ematches);
+	if (err < 0)
 		goto err2;
 
 	err = tcf_exts_init(&fnew->exts, TCA_FLOW_ACT, TCA_FLOW_POLICE);
@@ -512,7 +511,6 @@ static int flow_change(struct net *net, struct sk_buff *in_skb,
 			       (unsigned long)fnew);
 
 	tcf_exts_change(tp, &fnew->exts, &e);
-	tcf_em_tree_change(tp, &fnew->ematches, &t);
 
 	netif_keep_dst(qdisc_dev(tp->q));
 
@@ -554,8 +552,8 @@ static int flow_change(struct net *net, struct sk_buff *in_skb,
 
 err3:
 	tcf_exts_destroy(&fnew->exts);
+	tcf_em_tree_destroy(&fnew->ematches);
 err2:
-	tcf_em_tree_destroy(&t);
 	kfree(fnew);
 err1:
 	tcf_exts_destroy(&e);
-- 
cgit v1.2.3


From 3bcc0cec818fa969fe555b44443347211ed787a3 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Fri, 4 Aug 2017 14:28:58 +0200
Subject: net: sched: change names of action number helpers to be aligned with
 the rest

The rest of the helpers are named tcf_exts_*, so change the name of
the action number helpers to be aligned. While at it, change to inline
functions.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c  |  2 +-
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c      |  2 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c    |  4 +--
 drivers/net/ethernet/mellanox/mlxsw/spectrum.c     |  2 +-
 .../net/ethernet/mellanox/mlxsw/spectrum_flower.c  |  2 +-
 drivers/net/ethernet/netronome/nfp/bpf/offload.c   |  4 +--
 include/net/pkt_cls.h                              | 36 ++++++++++++++++------
 net/dsa/slave.c                                    |  2 +-
 net/sched/cls_api.c                                |  2 +-
 9 files changed, 37 insertions(+), 19 deletions(-)

(limited to 'include/net')

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c
index ef06ce8247ab..6f734c52ef25 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c
@@ -96,7 +96,7 @@ static int fill_action_fields(struct adapter *adap,
 	LIST_HEAD(actions);
 
 	exts = cls->knode.exts;
-	if (tc_no_actions(exts))
+	if (!tcf_exts_has_actions(exts))
 		return -EINVAL;
 
 	tcf_exts_to_list(exts, &actions);
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 96606e3eb965..091fcc7e6e43 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -8953,7 +8953,7 @@ static int parse_tc_actions(struct ixgbe_adapter *adapter,
 	LIST_HEAD(actions);
 	int err;
 
-	if (tc_no_actions(exts))
+	if (!tcf_exts_has_actions(exts))
 		return -EINVAL;
 
 	tcf_exts_to_list(exts, &actions);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 3c536f560dd2..78f50d9f621d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -1326,7 +1326,7 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
 	LIST_HEAD(actions);
 	int err;
 
-	if (tc_no_actions(exts))
+	if (!tcf_exts_has_actions(exts))
 		return -EINVAL;
 
 	attr->flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
@@ -1839,7 +1839,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
 	bool encap = false;
 	int err = 0;
 
-	if (tc_no_actions(exts))
+	if (!tcf_exts_has_actions(exts))
 		return -EINVAL;
 
 	memset(attr, 0, sizeof(*attr));
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 88b668ba0d8a..66d511d45c25 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -1626,7 +1626,7 @@ static int mlxsw_sp_port_add_cls_matchall(struct mlxsw_sp_port *mlxsw_sp_port,
 	LIST_HEAD(actions);
 	int err;
 
-	if (!tc_single_action(cls->exts)) {
+	if (!tcf_exts_has_one_action(cls->exts)) {
 		netdev_err(mlxsw_sp_port->dev, "only singular actions are supported\n");
 		return -EOPNOTSUPP;
 	}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
index 400ad4081660..9be48d2e43ca 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
@@ -53,7 +53,7 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp,
 	LIST_HEAD(actions);
 	int err;
 
-	if (tc_no_actions(exts))
+	if (!tcf_exts_has_actions(exts))
 		return 0;
 
 	/* Count action is inserted first */
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/offload.c b/drivers/net/ethernet/netronome/nfp/bpf/offload.c
index 78d80a364edb..a88bb5bc0082 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/offload.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/offload.c
@@ -115,14 +115,14 @@ nfp_net_bpf_get_act(struct nfp_net *nn, struct tc_cls_bpf_offload *cls_bpf)
 
 	/* TC direct action */
 	if (cls_bpf->exts_integrated) {
-		if (tc_no_actions(cls_bpf->exts))
+		if (!tcf_exts_has_actions(cls_bpf->exts))
 			return NN_ACT_DIRECT;
 
 		return -EOPNOTSUPP;
 	}
 
 	/* TC legacy mode */
-	if (!tc_single_action(cls_bpf->exts))
+	if (!tcf_exts_has_one_action(cls_bpf->exts))
 		return -EOPNOTSUPP;
 
 	tcf_exts_to_list(cls_bpf->exts, &actions);
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index f4462ec8b2f4..7f2563636df0 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -199,17 +199,35 @@ tcf_exts_exec(struct sk_buff *skb, struct tcf_exts *exts,
 	return 0;
 }
 
+/**
+ * tcf_exts_has_actions - check if at least one action is present
+ * @exts: tc filter extensions handle
+ *
+ * Returns true if at least one action is present.
+ */
+static inline bool tcf_exts_has_actions(struct tcf_exts *exts)
+{
 #ifdef CONFIG_NET_CLS_ACT
+	return exts->nr_actions;
+#else
+	return false;
+#endif
+}
 
-#define tc_no_actions(_exts)  ((_exts)->nr_actions == 0)
-#define tc_single_action(_exts) ((_exts)->nr_actions == 1)
-
-#else /* CONFIG_NET_CLS_ACT */
-
-#define tc_no_actions(_exts) true
-#define tc_single_action(_exts) false
-
-#endif /* CONFIG_NET_CLS_ACT */
+/**
+ * tcf_exts_has_one_action - check if exactly one action is present
+ * @exts: tc filter extensions handle
+ *
+ * Returns true if exactly one action is present.
+ */
+static inline bool tcf_exts_has_one_action(struct tcf_exts *exts)
+{
+#ifdef CONFIG_NET_CLS_ACT
+	return exts->nr_actions == 1;
+#else
+	return false;
+#endif
+}
 
 int tcf_exts_validate(struct net *net, struct tcf_proto *tp,
 		      struct nlattr **tb, struct nlattr *rate_tlv,
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index e196562035b1..83252e8426d7 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -779,7 +779,7 @@ static int dsa_slave_add_cls_matchall(struct net_device *dev,
 	if (!ds->ops->port_mirror_add)
 		return err;
 
-	if (!tc_single_action(cls->exts))
+	if (!tcf_exts_has_one_action(cls->exts))
 		return err;
 
 	tcf_exts_to_list(cls->exts, &actions);
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 39da0c5801c9..287ae6cbf73b 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -972,7 +972,7 @@ int tcf_exts_get_dev(struct net_device *dev, struct tcf_exts *exts,
 	const struct tc_action *a;
 	LIST_HEAD(actions);
 
-	if (tc_no_actions(exts))
+	if (!tcf_exts_has_actions(exts))
 		return -EINVAL;
 
 	tcf_exts_to_list(exts, &actions);
-- 
cgit v1.2.3


From af69afc551eb9f9b1f2cc3295e2dfcdaa1dc948d Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Fri, 4 Aug 2017 14:28:59 +0200
Subject: net: sched: use tcf_exts_has_actions in tcf_exts_exec

Use the tcf_exts_has_actions helper instead or directly testing
exts->nr_actions in tcf_exts_exec.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/pkt_cls.h | 46 +++++++++++++++++++++++-----------------------
 1 file changed, 23 insertions(+), 23 deletions(-)

(limited to 'include/net')

diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 7f2563636df0..322a2823cc6a 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -176,29 +176,6 @@ tcf_exts_stats_update(const struct tcf_exts *exts,
 #endif
 }
 
-/**
- * tcf_exts_exec - execute tc filter extensions
- * @skb: socket buffer
- * @exts: tc filter extensions handle
- * @res: desired result
- *
- * Executes all configured extensions. Returns 0 on a normal execution,
- * a negative number if the filter must be considered unmatched or
- * a positive action code (TC_ACT_*) which must be returned to the
- * underlying layer.
- */
-static inline int
-tcf_exts_exec(struct sk_buff *skb, struct tcf_exts *exts,
-	       struct tcf_result *res)
-{
-#ifdef CONFIG_NET_CLS_ACT
-	if (exts->nr_actions)
-		return tcf_action_exec(skb, exts->actions, exts->nr_actions,
-				       res);
-#endif
-	return 0;
-}
-
 /**
  * tcf_exts_has_actions - check if at least one action is present
  * @exts: tc filter extensions handle
@@ -229,6 +206,29 @@ static inline bool tcf_exts_has_one_action(struct tcf_exts *exts)
 #endif
 }
 
+/**
+ * tcf_exts_exec - execute tc filter extensions
+ * @skb: socket buffer
+ * @exts: tc filter extensions handle
+ * @res: desired result
+ *
+ * Executes all configured extensions. Returns 0 on a normal execution,
+ * a negative number if the filter must be considered unmatched or
+ * a positive action code (TC_ACT_*) which must be returned to the
+ * underlying layer.
+ */
+static inline int
+tcf_exts_exec(struct sk_buff *skb, struct tcf_exts *exts,
+	      struct tcf_result *res)
+{
+#ifdef CONFIG_NET_CLS_ACT
+	if (tcf_exts_has_actions(exts))
+		return tcf_action_exec(skb, exts->actions, exts->nr_actions,
+				       res);
+#endif
+	return 0;
+}
+
 int tcf_exts_validate(struct net *net, struct tcf_proto *tp,
 		      struct nlattr **tb, struct nlattr *rate_tlv,
 		      struct tcf_exts *exts, bool ovr);
-- 
cgit v1.2.3


From 6fc6d06e5371507e68c6904a3423622b0e465b64 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Fri, 4 Aug 2017 14:29:00 +0200
Subject: net: sched: remove redundant helpers tcf_exts_is_predicative and
 tcf_exts_is_available

These two helpers are doing the same as tcf_exts_has_actions, so remove
them and use tcf_exts_has_actions instead.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/pkt_cls.h   | 30 ------------------------------
 net/sched/cls_fw.c      |  2 +-
 net/sched/cls_route.c   |  2 +-
 net/sched/cls_tcindex.c |  2 +-
 4 files changed, 3 insertions(+), 33 deletions(-)

(limited to 'include/net')

diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 322a2823cc6a..817badf733b5 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -113,36 +113,6 @@ static inline int tcf_exts_init(struct tcf_exts *exts, int action, int police)
 	return 0;
 }
 
-/**
- * tcf_exts_is_predicative - check if a predicative extension is present
- * @exts: tc filter extensions handle
- *
- * Returns 1 if a predicative extension is present, i.e. an extension which
- * might cause further actions and thus overrule the regular tcf_result.
- */
-static inline int
-tcf_exts_is_predicative(struct tcf_exts *exts)
-{
-#ifdef CONFIG_NET_CLS_ACT
-	return exts->nr_actions;
-#else
-	return 0;
-#endif
-}
-
-/**
- * tcf_exts_is_available - check if at least one extension is present
- * @exts: tc filter extensions handle
- *
- * Returns 1 if at least one extension is present.
- */
-static inline int
-tcf_exts_is_available(struct tcf_exts *exts)
-{
-	/* All non-predicative extensions must be added here. */
-	return tcf_exts_is_predicative(exts);
-}
-
 static inline void tcf_exts_to_list(const struct tcf_exts *exts,
 				    struct list_head *actions)
 {
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index d3885362e017..a53fa75dfc7b 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -387,7 +387,7 @@ static int fw_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
 
 	t->tcm_handle = f->id;
 
-	if (!f->res.classid && !tcf_exts_is_available(&f->exts))
+	if (!f->res.classid && !tcf_exts_has_actions(&f->exts))
 		return skb->len;
 
 	nest = nla_nest_start(skb, TCA_OPTIONS);
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index d63d5502ee02..26f863634862 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -113,7 +113,7 @@ static inline int route4_hash_wild(void)
 #define ROUTE4_APPLY_RESULT()					\
 {								\
 	*res = f->res;						\
-	if (tcf_exts_is_available(&f->exts)) {			\
+	if (tcf_exts_has_actions(&f->exts)) {			\
 		int r = tcf_exts_exec(skb, &f->exts, res);	\
 		if (r < 0) {					\
 			dont_cache = 1;				\
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index 8a8a58357c39..66924d147e97 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -52,7 +52,7 @@ struct tcindex_data {
 
 static inline int tcindex_filter_is_set(struct tcindex_filter_result *r)
 {
-	return tcf_exts_is_predicative(&r->exts) || r->res.classid;
+	return tcf_exts_has_actions(&r->exts) || r->res.classid;
 }
 
 static struct tcindex_filter_result *tcindex_lookup(struct tcindex_data *p,
-- 
cgit v1.2.3


From af089e701adfb5898fee00a56ea4bb421edc308d Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Fri, 4 Aug 2017 14:29:01 +0200
Subject: net: sched: fix return value of tcf_exts_exec

Return the defined TC_ACT_OK instead of 0.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/pkt_cls.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/net')

diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 817badf733b5..61ce521688b2 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -182,7 +182,7 @@ static inline bool tcf_exts_has_one_action(struct tcf_exts *exts)
  * @exts: tc filter extensions handle
  * @res: desired result
  *
- * Executes all configured extensions. Returns 0 on a normal execution,
+ * Executes all configured extensions. Returns TC_ACT_OK on a normal execution,
  * a negative number if the filter must be considered unmatched or
  * a positive action code (TC_ACT_*) which must be returned to the
  * underlying layer.
@@ -196,7 +196,7 @@ tcf_exts_exec(struct sk_buff *skb, struct tcf_exts *exts,
 		return tcf_action_exec(skb, exts->actions, exts->nr_actions,
 				       res);
 #endif
-	return 0;
+	return TC_ACT_OK;
 }
 
 int tcf_exts_validate(struct net *net, struct tcf_proto *tp,
-- 
cgit v1.2.3


From ec1a9cca0e13391167567964fd04e61a39d6a4ae Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Fri, 4 Aug 2017 14:29:02 +0200
Subject: net: sched: remove check for number of actions in tcf_exts_exec

Leave it to tcf_action_exec to return TC_ACT_OK in case there is no
action present.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/pkt_cls.h | 4 +---
 net/sched/act_api.c   | 3 ++-
 2 files changed, 3 insertions(+), 4 deletions(-)

(limited to 'include/net')

diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 61ce521688b2..b8959c9a190d 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -192,9 +192,7 @@ tcf_exts_exec(struct sk_buff *skb, struct tcf_exts *exts,
 	      struct tcf_result *res)
 {
 #ifdef CONFIG_NET_CLS_ACT
-	if (tcf_exts_has_actions(exts))
-		return tcf_action_exec(skb, exts->actions, exts->nr_actions,
-				       res);
+	return tcf_action_exec(skb, exts->actions, exts->nr_actions, res);
 #endif
 	return TC_ACT_OK;
 }
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index f19b118df414..a2915d958279 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -473,9 +473,10 @@ static struct tc_action_ops *tc_lookup_action(struct nlattr *kind)
 int tcf_action_exec(struct sk_buff *skb, struct tc_action **actions,
 		    int nr_actions, struct tcf_result *res)
 {
-	int ret = -1, i;
 	u32 jmp_prgcnt = 0;
 	u32 jmp_ttl = TCA_ACT_MAX_PRIO; /*matches actions per filter */
+	int i;
+	int ret = TC_ACT_OK;
 
 	if (skb_skip_tc_classify(skb))
 		return TC_ACT_OK;
-- 
cgit v1.2.3


From 9b0d4446b56904b59ae3809913b0ac760fa941a6 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Fri, 4 Aug 2017 14:29:15 +0200
Subject: net: sched: avoid atomic swap in tcf_exts_change

tcf_exts_change is always called on newly created exts, which are not used
on fastpath. Therefore, simple struct copy is enough.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/pkt_cls.h   |  3 +--
 net/sched/cls_api.c     | 10 ++--------
 net/sched/cls_rsvp.h    |  4 ++--
 net/sched/cls_tcindex.c |  6 +++---
 4 files changed, 8 insertions(+), 15 deletions(-)

(limited to 'include/net')

diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index b8959c9a190d..e0c54f111467 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -201,8 +201,7 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp,
 		      struct nlattr **tb, struct nlattr *rate_tlv,
 		      struct tcf_exts *exts, bool ovr);
 void tcf_exts_destroy(struct tcf_exts *exts);
-void tcf_exts_change(struct tcf_proto *tp, struct tcf_exts *dst,
-		     struct tcf_exts *src);
+void tcf_exts_change(struct tcf_exts *dst, struct tcf_exts *src);
 int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts);
 int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts);
 int tcf_exts_get_dev(struct net_device *dev, struct tcf_exts *exts,
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 735d556a5283..e655221c654e 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -883,18 +883,12 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
 }
 EXPORT_SYMBOL(tcf_exts_validate);
 
-void tcf_exts_change(struct tcf_proto *tp, struct tcf_exts *dst,
-		     struct tcf_exts *src)
+void tcf_exts_change(struct tcf_exts *dst, struct tcf_exts *src)
 {
 #ifdef CONFIG_NET_CLS_ACT
 	struct tcf_exts old = *dst;
 
-	tcf_tree_lock(tp);
-	dst->nr_actions = src->nr_actions;
-	dst->actions = src->actions;
-	dst->type = src->type;
-	tcf_tree_unlock(tp);
-
+	*dst = *src;
 	tcf_exts_destroy(&old);
 #endif
 }
diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h
index 0d9d07798699..4adb67a73491 100644
--- a/net/sched/cls_rsvp.h
+++ b/net/sched/cls_rsvp.h
@@ -518,7 +518,7 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb,
 			tcf_bind_filter(tp, &n->res, base);
 		}
 
-		tcf_exts_change(tp, &n->exts, &e);
+		tcf_exts_change(&n->exts, &e);
 		rsvp_replace(tp, n, handle);
 		return 0;
 	}
@@ -591,7 +591,7 @@ insert:
 			if (f->tunnelhdr == 0)
 				tcf_bind_filter(tp, &f->res, base);
 
-			tcf_exts_change(tp, &f->exts, &e);
+			tcf_exts_change(&f->exts, &e);
 
 			fp = &s->ht[h2];
 			for (nfp = rtnl_dereference(*fp); nfp;
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index 66924d147e97..d69f828f3fed 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -419,9 +419,9 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
 	}
 
 	if (old_r)
-		tcf_exts_change(tp, &r->exts, &e);
+		tcf_exts_change(&r->exts, &e);
 	else
-		tcf_exts_change(tp, &cr.exts, &e);
+		tcf_exts_change(&cr.exts, &e);
 
 	if (old_r && old_r != r) {
 		err = tcindex_filter_result_init(old_r);
@@ -439,7 +439,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
 		struct tcindex_filter *nfp;
 		struct tcindex_filter __rcu **fp;
 
-		tcf_exts_change(tp, &f->result.exts, &r->exts);
+		tcf_exts_change(&f->result.exts, &r->exts);
 
 		fp = cp->h + (handle % cp->hash);
 		for (nfp = rtnl_dereference(*fp);
-- 
cgit v1.2.3


From 91ed1e666a4ea2e260452a7d7d311ac5ae852cba Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Thu, 3 Aug 2017 18:07:06 +0200
Subject: ip/options: explicitly provide net ns to __ip_options_echo()

__ip_options_echo() uses the current network namespace, and
currently retrives it via skb->dst->dev.

This commit adds an explicit 'net' argument to __ip_options_echo()
and update all the call sites to provide it, usually via a simpler
sock_net().

After this change, __ip_options_echo() no more needs to access
skb->dst and we can drop a couple of hack to preserve such
info in the rx path.

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip.h       | 9 +++++----
 include/net/tcp.h      | 5 +++--
 net/ipv4/icmp.c        | 4 ++--
 net/ipv4/ip_options.c  | 6 +++---
 net/ipv4/ip_output.c   | 2 +-
 net/ipv4/ip_sockglue.c | 7 ++++---
 net/ipv4/syncookies.c  | 2 +-
 net/ipv4/tcp_ipv4.c    | 2 +-
 8 files changed, 20 insertions(+), 17 deletions(-)

(limited to 'include/net')

diff --git a/include/net/ip.h b/include/net/ip.h
index 821cedcc8e73..9e59dcf1787a 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -567,11 +567,12 @@ int ip_forward(struct sk_buff *skb);
 void ip_options_build(struct sk_buff *skb, struct ip_options *opt,
 		      __be32 daddr, struct rtable *rt, int is_frag);
 
-int __ip_options_echo(struct ip_options *dopt, struct sk_buff *skb,
-		      const struct ip_options *sopt);
-static inline int ip_options_echo(struct ip_options *dopt, struct sk_buff *skb)
+int __ip_options_echo(struct net *net, struct ip_options *dopt,
+		      struct sk_buff *skb, const struct ip_options *sopt);
+static inline int ip_options_echo(struct net *net, struct ip_options *dopt,
+				  struct sk_buff *skb)
 {
-	return __ip_options_echo(dopt, skb, &IPCB(skb)->opt);
+	return __ip_options_echo(net, dopt, skb, &IPCB(skb)->opt);
 }
 
 void ip_options_fragment(struct sk_buff *skb);
diff --git a/include/net/tcp.h b/include/net/tcp.h
index bb1881b4ce48..5173fecde495 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1885,7 +1885,8 @@ extern void tcp_rack_reo_timeout(struct sock *sk);
 /*
  * Save and compile IPv4 options, return a pointer to it
  */
-static inline struct ip_options_rcu *tcp_v4_save_options(struct sk_buff *skb)
+static inline struct ip_options_rcu *tcp_v4_save_options(struct net *net,
+							 struct sk_buff *skb)
 {
 	const struct ip_options *opt = &TCP_SKB_CB(skb)->header.h4.opt;
 	struct ip_options_rcu *dopt = NULL;
@@ -1894,7 +1895,7 @@ static inline struct ip_options_rcu *tcp_v4_save_options(struct sk_buff *skb)
 		int opt_size = sizeof(*dopt) + opt->optlen;
 
 		dopt = kmalloc(opt_size, GFP_ATOMIC);
-		if (dopt && __ip_options_echo(&dopt->opt, skb, opt)) {
+		if (dopt && __ip_options_echo(net, &dopt->opt, skb, opt)) {
 			kfree(dopt);
 			dopt = NULL;
 		}
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index c2be26b98b5f..681e33998e03 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -412,7 +412,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
 	int type = icmp_param->data.icmph.type;
 	int code = icmp_param->data.icmph.code;
 
-	if (ip_options_echo(&icmp_param->replyopts.opt.opt, skb))
+	if (ip_options_echo(net, &icmp_param->replyopts.opt.opt, skb))
 		return;
 
 	/* Needed by both icmp_global_allow and icmp_xmit_lock */
@@ -694,7 +694,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
 					  iph->tos;
 	mark = IP4_REPLY_MARK(net, skb_in->mark);
 
-	if (ip_options_echo(&icmp_param.replyopts.opt.opt, skb_in))
+	if (ip_options_echo(net, &icmp_param.replyopts.opt.opt, skb_in))
 		goto out_unlock;
 
 
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index fdda97308c0b..525ae88d1e58 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -86,8 +86,8 @@ void ip_options_build(struct sk_buff *skb, struct ip_options *opt,
  * NOTE: dopt cannot point to skb.
  */
 
-int __ip_options_echo(struct ip_options *dopt, struct sk_buff *skb,
-		      const struct ip_options *sopt)
+int __ip_options_echo(struct net *net, struct ip_options *dopt,
+		      struct sk_buff *skb, const struct ip_options *sopt)
 {
 	unsigned char *sptr, *dptr;
 	int soffset, doffset;
@@ -140,7 +140,7 @@ int __ip_options_echo(struct ip_options *dopt, struct sk_buff *skb,
 						__be32 addr;
 
 						memcpy(&addr, dptr+soffset-1, 4);
-						if (inet_addr_type(dev_net(skb_dst(skb)->dev), addr) != RTN_UNICAST) {
+						if (inet_addr_type(net, addr) != RTN_UNICAST) {
 							dopt->ts_needtime = 1;
 							soffset += 8;
 						}
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index b631ec685d77..73b0b15245b6 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -1525,7 +1525,7 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb,
 	int err;
 	int oif;
 
-	if (__ip_options_echo(&replyopts.opt.opt, skb, sopt))
+	if (__ip_options_echo(net, &replyopts.opt.opt, skb, sopt))
 		return;
 
 	ipc.addr = daddr;
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index ecc4b4a2413e..1c3354d028a4 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -80,7 +80,8 @@ static void ip_cmsg_recv_opts(struct msghdr *msg, struct sk_buff *skb)
 }
 
 
-static void ip_cmsg_recv_retopts(struct msghdr *msg, struct sk_buff *skb)
+static void ip_cmsg_recv_retopts(struct net *net, struct msghdr *msg,
+				 struct sk_buff *skb)
 {
 	unsigned char optbuf[sizeof(struct ip_options) + 40];
 	struct ip_options *opt = (struct ip_options *)optbuf;
@@ -88,7 +89,7 @@ static void ip_cmsg_recv_retopts(struct msghdr *msg, struct sk_buff *skb)
 	if (IPCB(skb)->opt.optlen == 0)
 		return;
 
-	if (ip_options_echo(opt, skb)) {
+	if (ip_options_echo(net, opt, skb)) {
 		msg->msg_flags |= MSG_CTRUNC;
 		return;
 	}
@@ -204,7 +205,7 @@ void ip_cmsg_recv_offset(struct msghdr *msg, struct sock *sk,
 	}
 
 	if (flags & IP_CMSG_RETOPTS) {
-		ip_cmsg_recv_retopts(msg, skb);
+		ip_cmsg_recv_retopts(sock_net(sk), msg, skb);
 
 		flags &= ~IP_CMSG_RETOPTS;
 		if (!flags)
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 03ad8778c395..b1bb1b3a1082 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -355,7 +355,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
 	/* We throwed the options of the initial SYN away, so we hope
 	 * the ACK carries the same options again (see RFC1122 4.2.3.8)
 	 */
-	ireq->opt = tcp_v4_save_options(skb);
+	ireq->opt = tcp_v4_save_options(sock_net(sk), skb);
 
 	if (security_inet_conn_request(sk, skb, req)) {
 		reqsk_free(req);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 9b51663cd5a4..5f708c85110e 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1267,7 +1267,7 @@ static void tcp_v4_init_req(struct request_sock *req,
 
 	sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr);
 	sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr);
-	ireq->opt = tcp_v4_save_options(skb);
+	ireq->opt = tcp_v4_save_options(sock_net(sk_listener), skb);
 }
 
 static struct dst_entry *tcp_v4_route_req(const struct sock *sk,
-- 
cgit v1.2.3


From 233e7936c84b7d7dd90c10e2ba27abb5ab42956f Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Sat, 5 Aug 2017 19:59:51 +0800
Subject: sctp: remove the typedef sctp_lower_cwnd_t

This patch is to remove the typedef sctp_lower_cwnd_t, and
replace with enum sctp_lower_cwnd in the places where it's
using this typedef.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/constants.h | 4 ++--
 include/net/sctp/structs.h   | 3 ++-
 net/sctp/transport.c         | 2 +-
 3 files changed, 5 insertions(+), 4 deletions(-)

(limited to 'include/net')

diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h
index 9b18044c551e..761064ee3ac5 100644
--- a/include/net/sctp/constants.h
+++ b/include/net/sctp/constants.h
@@ -378,12 +378,12 @@ typedef enum {
 } sctp_retransmit_reason_t;
 
 /* Reasons to lower cwnd. */
-typedef enum {
+enum sctp_lower_cwnd {
 	SCTP_LOWER_CWND_T3_RTX,
 	SCTP_LOWER_CWND_FAST_RTX,
 	SCTP_LOWER_CWND_ECNE,
 	SCTP_LOWER_CWND_INACTIVE,
-} sctp_lower_cwnd_t;
+};
 
 
 /* SCTP-AUTH Necessary constants */
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 66cd7639b912..53802d8872d7 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -950,7 +950,8 @@ int sctp_transport_hold(struct sctp_transport *);
 void sctp_transport_put(struct sctp_transport *);
 void sctp_transport_update_rto(struct sctp_transport *, __u32);
 void sctp_transport_raise_cwnd(struct sctp_transport *, __u32, __u32);
-void sctp_transport_lower_cwnd(struct sctp_transport *, sctp_lower_cwnd_t);
+void sctp_transport_lower_cwnd(struct sctp_transport *t,
+			       enum sctp_lower_cwnd reason);
 void sctp_transport_burst_limited(struct sctp_transport *);
 void sctp_transport_burst_reset(struct sctp_transport *);
 unsigned long sctp_transport_timeout(struct sctp_transport *);
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index 80a97c8501a7..2d9bd3776bc8 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -490,7 +490,7 @@ void sctp_transport_raise_cwnd(struct sctp_transport *transport,
  * detected.
  */
 void sctp_transport_lower_cwnd(struct sctp_transport *transport,
-			       sctp_lower_cwnd_t reason)
+			       enum sctp_lower_cwnd reason)
 {
 	struct sctp_association *asoc = transport->asoc;
 
-- 
cgit v1.2.3


From 125c29820252bfa5bf8081e75618e4ee7e9487da Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Sat, 5 Aug 2017 19:59:52 +0800
Subject: sctp: remove the typedef sctp_retransmit_reason_t

This patch is to remove the typedef sctp_retransmit_reason_t, and
replace with enum sctp_retransmit_reason in the places where it's
using this typedef.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/constants.h | 4 ++--
 include/net/sctp/structs.h   | 4 ++--
 net/sctp/outqueue.c          | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/net')

diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h
index 761064ee3ac5..922fba5880d6 100644
--- a/include/net/sctp/constants.h
+++ b/include/net/sctp/constants.h
@@ -370,12 +370,12 @@ typedef enum {
 						   peer */
 
 /* Reasons to retransmit. */
-typedef enum {
+enum sctp_retransmit_reason {
 	SCTP_RTXR_T3_RTX,
 	SCTP_RTXR_FAST_RTX,
 	SCTP_RTXR_PMTUD,
 	SCTP_RTXR_T1_RTX,
-} sctp_retransmit_reason_t;
+};
 
 /* Reasons to lower cwnd. */
 enum sctp_lower_cwnd {
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 53802d8872d7..5e872acee6e3 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -1054,8 +1054,8 @@ int sctp_outq_sack(struct sctp_outq *, struct sctp_chunk *);
 int sctp_outq_is_empty(const struct sctp_outq *);
 void sctp_outq_restart(struct sctp_outq *);
 
-void sctp_retransmit(struct sctp_outq *, struct sctp_transport *,
-		     sctp_retransmit_reason_t);
+void sctp_retransmit(struct sctp_outq *q, struct sctp_transport *transport,
+		     enum sctp_retransmit_reason reason);
 void sctp_retransmit_mark(struct sctp_outq *, struct sctp_transport *, __u8);
 void sctp_outq_uncork(struct sctp_outq *, gfp_t gfp);
 void sctp_prsctp_prune(struct sctp_association *asoc,
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index d2a8adfd4a6f..08ee0ed9a0c6 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -534,7 +534,7 @@ void sctp_retransmit_mark(struct sctp_outq *q,
  * one packet out.
  */
 void sctp_retransmit(struct sctp_outq *q, struct sctp_transport *transport,
-		     sctp_retransmit_reason_t reason)
+		     enum sctp_retransmit_reason reason)
 {
 	struct net *net = sock_net(q->asoc->base.sk);
 
-- 
cgit v1.2.3


From 701ef3e6c74be771a76be39817941e68e7228644 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Sat, 5 Aug 2017 19:59:53 +0800
Subject: sctp: remove the typedef sctp_scope_policy_t

This patch is to remove the typedef sctp_scope_policy_t and keep
it's members as an anonymous enum.

It is also to define SCTP_SCOPE_POLICY_MAX to replace the num 3
in sysctl.c to make codes clear.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/constants.h | 6 ++++--
 net/sctp/sysctl.c            | 2 +-
 2 files changed, 5 insertions(+), 3 deletions(-)

(limited to 'include/net')

diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h
index 922fba5880d6..acb03eb64842 100644
--- a/include/net/sctp/constants.h
+++ b/include/net/sctp/constants.h
@@ -341,12 +341,14 @@ typedef enum {
 	SCTP_SCOPE_UNUSABLE,		/* IPv4 unusable addresses */
 } sctp_scope_t;
 
-typedef enum {
+enum {
 	SCTP_SCOPE_POLICY_DISABLE,	/* Disable IPv4 address scoping */
 	SCTP_SCOPE_POLICY_ENABLE,	/* Enable IPv4 address scoping */
 	SCTP_SCOPE_POLICY_PRIVATE,	/* Follow draft but allow IPv4 private addresses */
 	SCTP_SCOPE_POLICY_LINK,		/* Follow draft but allow IPv4 link local addresses */
-} sctp_scope_policy_t;
+};
+
+#define SCTP_SCOPE_POLICY_MAX	SCTP_SCOPE_POLICY_LINK
 
 /* Based on IPv4 scoping <draft-stewart-tsvwg-sctp-ipv4-00.txt>,
  * SCTP IPv4 unusable addresses: 0.0.0.0/8, 224.0.0.0/4, 198.18.0.0/24,
diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c
index 0e732f68c2bf..ef7ca44d6e6a 100644
--- a/net/sctp/sysctl.c
+++ b/net/sctp/sysctl.c
@@ -46,7 +46,7 @@ static int timer_max = 86400000; /* ms in one day */
 static int int_max = INT_MAX;
 static int sack_timer_min = 1;
 static int sack_timer_max = 500;
-static int addr_scope_max = 3; /* check sctp_scope_policy_t in include/net/sctp/constants.h for max entries */
+static int addr_scope_max = SCTP_SCOPE_POLICY_MAX;
 static int rwnd_scale_max = 16;
 static int rto_alpha_min = 0;
 static int rto_beta_min = 0;
-- 
cgit v1.2.3


From 1c662018d2d41ecc5550cbd74d29d2d32c164ed3 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Sat, 5 Aug 2017 19:59:54 +0800
Subject: sctp: remove the typedef sctp_scope_t

This patch is to remove the typedef sctp_scope_t, and
replace with enum sctp_scope in the places where it's
using this typedef.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/constants.h |  4 ++--
 include/net/sctp/sctp.h      |  4 ++--
 include/net/sctp/structs.h   | 17 +++++++++--------
 net/sctp/associola.c         | 17 ++++++++---------
 net/sctp/bind_addr.c         | 20 ++++++++++----------
 net/sctp/ipv6.c              |  6 +++---
 net/sctp/protocol.c          |  6 +++---
 net/sctp/sm_make_chunk.c     |  6 +++---
 net/sctp/socket.c            |  4 ++--
 9 files changed, 42 insertions(+), 42 deletions(-)

(limited to 'include/net')

diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h
index acb03eb64842..0503bb70e5d9 100644
--- a/include/net/sctp/constants.h
+++ b/include/net/sctp/constants.h
@@ -333,13 +333,13 @@ typedef enum {
  * At this point, the IPv6 scopes will be mapped to these internal scopes
  * as much as possible.
  */
-typedef enum {
+enum sctp_scope {
 	SCTP_SCOPE_GLOBAL,		/* IPv4 global addresses */
 	SCTP_SCOPE_PRIVATE,		/* IPv4 private addresses */
 	SCTP_SCOPE_LINK,		/* IPv4 link local address */
 	SCTP_SCOPE_LOOPBACK,		/* IPv4 loopback address */
 	SCTP_SCOPE_UNUSABLE,		/* IPv4 unusable addresses */
-} sctp_scope_t;
+};
 
 enum {
 	SCTP_SCOPE_POLICY_DISABLE,	/* Disable IPv4 address scoping */
diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index 84650fed1e6a..ca66b033ec38 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -94,8 +94,8 @@
 /*
  * sctp/protocol.c
  */
-int sctp_copy_local_addr_list(struct net *, struct sctp_bind_addr *,
-			      sctp_scope_t, gfp_t gfp, int flags);
+int sctp_copy_local_addr_list(struct net *net, struct sctp_bind_addr *addr,
+			      enum sctp_scope, gfp_t gfp, int flags);
 struct sctp_pf *sctp_get_pf_specific(sa_family_t family);
 int sctp_register_pf(struct sctp_pf *, sa_family_t);
 void sctp_addr_wq_mgmt(struct net *, struct sctp_sockaddr_entry *, int);
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 5e872acee6e3..d771d418481f 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -449,7 +449,7 @@ struct sctp_af {
 	int		(*addr_valid)	(union sctp_addr *,
 					 struct sctp_sock *,
 					 const struct sk_buff *);
-	sctp_scope_t	(*scope) (union sctp_addr *);
+	enum sctp_scope	(*scope)(union sctp_addr *);
 	void		(*inaddr_any)	(union sctp_addr *, __be16);
 	int		(*is_any)	(const union sctp_addr *);
 	int		(*available)	(union sctp_addr *,
@@ -1111,7 +1111,7 @@ void sctp_bind_addr_init(struct sctp_bind_addr *, __u16 port);
 void sctp_bind_addr_free(struct sctp_bind_addr *);
 int sctp_bind_addr_copy(struct net *net, struct sctp_bind_addr *dest,
 			const struct sctp_bind_addr *src,
-			sctp_scope_t scope, gfp_t gfp,
+			enum sctp_scope scope, gfp_t gfp,
 			int flags);
 int sctp_bind_addr_dup(struct sctp_bind_addr *dest,
 			const struct sctp_bind_addr *src,
@@ -1135,8 +1135,9 @@ union sctp_params sctp_bind_addrs_to_raw(const struct sctp_bind_addr *bp,
 int sctp_raw_to_bind_addrs(struct sctp_bind_addr *bp, __u8 *raw, int len,
 			   __u16 port, gfp_t gfp);
 
-sctp_scope_t sctp_scope(const union sctp_addr *);
-int sctp_in_scope(struct net *net, const union sctp_addr *addr, const sctp_scope_t scope);
+enum sctp_scope sctp_scope(const union sctp_addr *addr);
+int sctp_in_scope(struct net *net, const union sctp_addr *addr,
+		  const enum sctp_scope scope);
 int sctp_is_any(struct sock *sk, const union sctp_addr *addr);
 int sctp_is_ep_boundall(struct sock *sk);
 
@@ -1925,8 +1926,8 @@ static inline struct sctp_association *sctp_assoc(struct sctp_ep_common *base)
 
 
 struct sctp_association *
-sctp_association_new(const struct sctp_endpoint *, const struct sock *,
-		     sctp_scope_t scope, gfp_t gfp);
+sctp_association_new(const struct sctp_endpoint *ep, const struct sock *sk,
+		     enum sctp_scope scope, gfp_t gfp);
 void sctp_association_free(struct sctp_association *);
 void sctp_association_put(struct sctp_association *);
 void sctp_association_hold(struct sctp_association *);
@@ -1967,8 +1968,8 @@ void sctp_assoc_set_primary(struct sctp_association *,
 			    struct sctp_transport *);
 void sctp_assoc_del_nonprimary_peers(struct sctp_association *,
 				    struct sctp_transport *);
-int sctp_assoc_set_bind_addr_from_ep(struct sctp_association *,
-				     sctp_scope_t, gfp_t);
+int sctp_assoc_set_bind_addr_from_ep(struct sctp_association *asoc,
+				     enum sctp_scope scope, gfp_t gfp);
 int sctp_assoc_set_bind_addr_from_cookie(struct sctp_association *,
 					 struct sctp_cookie*,
 					 gfp_t gfp);
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 40ec83679d6e..4c1f1bb2aaad 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -63,11 +63,11 @@ static void sctp_assoc_free_asconf_queue(struct sctp_association *asoc);
 /* 1st Level Abstractions. */
 
 /* Initialize a new association from provided memory. */
-static struct sctp_association *sctp_association_init(struct sctp_association *asoc,
-					  const struct sctp_endpoint *ep,
-					  const struct sock *sk,
-					  sctp_scope_t scope,
-					  gfp_t gfp)
+static struct sctp_association *sctp_association_init(
+					struct sctp_association *asoc,
+					const struct sctp_endpoint *ep,
+					const struct sock *sk,
+					enum sctp_scope scope, gfp_t gfp)
 {
 	struct net *net = sock_net(sk);
 	struct sctp_sock *sp;
@@ -301,9 +301,8 @@ fail_init:
 
 /* Allocate and initialize a new association */
 struct sctp_association *sctp_association_new(const struct sctp_endpoint *ep,
-					 const struct sock *sk,
-					 sctp_scope_t scope,
-					 gfp_t gfp)
+					      const struct sock *sk,
+					      enum sctp_scope scope, gfp_t gfp)
 {
 	struct sctp_association *asoc;
 
@@ -1564,7 +1563,7 @@ void sctp_assoc_rwnd_decrease(struct sctp_association *asoc, unsigned int len)
  * local endpoint and the remote peer.
  */
 int sctp_assoc_set_bind_addr_from_ep(struct sctp_association *asoc,
-				     sctp_scope_t scope, gfp_t gfp)
+				     enum sctp_scope scope, gfp_t gfp)
 {
 	int flags;
 
diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c
index 1ebc184a0e23..7df3704982f5 100644
--- a/net/sctp/bind_addr.c
+++ b/net/sctp/bind_addr.c
@@ -45,9 +45,9 @@
 #include <net/sctp/sm.h>
 
 /* Forward declarations for internal helpers. */
-static int sctp_copy_one_addr(struct net *, struct sctp_bind_addr *,
-			      union sctp_addr *, sctp_scope_t scope, gfp_t gfp,
-			      int flags);
+static int sctp_copy_one_addr(struct net *net, struct sctp_bind_addr *dest,
+			      union sctp_addr *addr, enum sctp_scope scope,
+			      gfp_t gfp, int flags);
 static void sctp_bind_addr_clean(struct sctp_bind_addr *);
 
 /* First Level Abstractions. */
@@ -57,7 +57,7 @@ static void sctp_bind_addr_clean(struct sctp_bind_addr *);
  */
 int sctp_bind_addr_copy(struct net *net, struct sctp_bind_addr *dest,
 			const struct sctp_bind_addr *src,
-			sctp_scope_t scope, gfp_t gfp,
+			enum sctp_scope scope, gfp_t gfp,
 			int flags)
 {
 	struct sctp_sockaddr_entry *addr;
@@ -440,9 +440,8 @@ union sctp_addr *sctp_find_unmatch_addr(struct sctp_bind_addr	*bp,
 
 /* Copy out addresses from the global local address list. */
 static int sctp_copy_one_addr(struct net *net, struct sctp_bind_addr *dest,
-			      union sctp_addr *addr,
-			      sctp_scope_t scope, gfp_t gfp,
-			      int flags)
+			      union sctp_addr *addr, enum sctp_scope scope,
+			      gfp_t gfp, int flags)
 {
 	int error = 0;
 
@@ -485,9 +484,10 @@ int sctp_is_any(struct sock *sk, const union sctp_addr *addr)
 }
 
 /* Is 'addr' valid for 'scope'?  */
-int sctp_in_scope(struct net *net, const union sctp_addr *addr, sctp_scope_t scope)
+int sctp_in_scope(struct net *net, const union sctp_addr *addr,
+		  enum sctp_scope scope)
 {
-	sctp_scope_t addr_scope = sctp_scope(addr);
+	enum sctp_scope addr_scope = sctp_scope(addr);
 
 	/* The unusable SCTP addresses will not be considered with
 	 * any defined scopes.
@@ -545,7 +545,7 @@ int sctp_is_ep_boundall(struct sock *sk)
  ********************************************************************/
 
 /* What is the scope of 'addr'?  */
-sctp_scope_t sctp_scope(const union sctp_addr *addr)
+enum sctp_scope sctp_scope(const union sctp_addr *addr)
 {
 	struct sctp_af *af;
 
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 107d7c912922..a2a1c1d08d51 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -243,8 +243,8 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
 	union sctp_addr *daddr = &t->ipaddr;
 	union sctp_addr dst_saddr;
 	struct in6_addr *final_p, final;
+	enum sctp_scope scope;
 	__u8 matchlen = 0;
-	sctp_scope_t scope;
 
 	memset(fl6, 0, sizeof(struct flowi6));
 	fl6->daddr = daddr->v6.sin6_addr;
@@ -624,10 +624,10 @@ static int sctp_v6_addr_valid(union sctp_addr *addr,
 }
 
 /* What is the scope of 'addr'?  */
-static sctp_scope_t sctp_v6_scope(union sctp_addr *addr)
+static enum sctp_scope sctp_v6_scope(union sctp_addr *addr)
 {
+	enum sctp_scope retval;
 	int v6scope;
-	sctp_scope_t retval;
 
 	/* The IPv6 scope is really a set of bit fields.
 	 * See IFA_* in <net/if_inet6.h>.  Map to a generic SCTP scope.
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 852556d67ae3..fcd80feb293f 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -196,7 +196,7 @@ static void sctp_free_local_addr_list(struct net *net)
 
 /* Copy the local addresses which are valid for 'scope' into 'bp'.  */
 int sctp_copy_local_addr_list(struct net *net, struct sctp_bind_addr *bp,
-			      sctp_scope_t scope, gfp_t gfp, int copy_flags)
+			      enum sctp_scope scope, gfp_t gfp, int copy_flags)
 {
 	struct sctp_sockaddr_entry *addr;
 	union sctp_addr laddr;
@@ -400,9 +400,9 @@ static int sctp_v4_available(union sctp_addr *addr, struct sctp_sock *sp)
  * IPv4 scoping can be controlled through sysctl option
  * net.sctp.addr_scope_policy
  */
-static sctp_scope_t sctp_v4_scope(union sctp_addr *addr)
+static enum sctp_scope sctp_v4_scope(union sctp_addr *addr)
 {
-	sctp_scope_t retval;
+	enum sctp_scope retval;
 
 	/* Check for unusable SCTP addresses. */
 	if (IS_IPV4_UNUSABLE_ADDRESS(addr->v4.sin_addr.s_addr)) {
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index d17e8d1f2ed9..a034d842e335 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -1578,8 +1578,8 @@ struct sctp_association *sctp_make_temp_asoc(const struct sctp_endpoint *ep,
 					gfp_t gfp)
 {
 	struct sctp_association *asoc;
+	enum sctp_scope scope;
 	struct sk_buff *skb;
-	sctp_scope_t scope;
 
 	/* Create the bare association.  */
 	scope = sctp_scope(sctp_source(chunk));
@@ -1701,7 +1701,7 @@ struct sctp_association *sctp_unpack_cookie(
 	int headersize, bodysize, fixed_size;
 	__u8 *digest = ep->digest;
 	unsigned int len;
-	sctp_scope_t scope;
+	enum sctp_scope scope;
 	struct sk_buff *skb = chunk->skb;
 	ktime_t kt;
 
@@ -2502,7 +2502,7 @@ static int sctp_process_param(struct sctp_association *asoc,
 	int i;
 	__u16 sat;
 	int retval = 1;
-	sctp_scope_t scope;
+	enum sctp_scope scope;
 	u32 stale;
 	struct sctp_af *af;
 	union sctp_addr_param *addr_param;
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 1db478e34520..a1e2113806dd 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -1055,7 +1055,7 @@ static int __sctp_connect(struct sock *sk,
 	struct sctp_association *asoc2;
 	struct sctp_transport *transport;
 	union sctp_addr to;
-	sctp_scope_t scope;
+	enum sctp_scope scope;
 	long timeo;
 	int err = 0;
 	int addrcnt = 0;
@@ -1610,7 +1610,7 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
 	struct sctp_initmsg *sinit;
 	sctp_assoc_t associd = 0;
 	sctp_cmsgs_t cmsgs = { NULL };
-	sctp_scope_t scope;
+	enum sctp_scope scope;
 	bool fill_sinfo_ttl = false, wait_connect = false;
 	struct sctp_datamsg *datamsg;
 	int msg_flags = msg->msg_flags;
-- 
cgit v1.2.3


From 0ceaeebe28d46e49c865f88a3e3ca75f6cf13e1f Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Sat, 5 Aug 2017 19:59:55 +0800
Subject: sctp: remove the typedef sctp_transport_cmd_t

This patch is to remove the typedef sctp_transport_cmd_t, and
replace with enum sctp_transport_cmd in the places where it's
using this typedef.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/constants.h | 4 ++--
 include/net/sctp/structs.h   | 7 ++++---
 net/sctp/associola.c         | 2 +-
 3 files changed, 7 insertions(+), 6 deletions(-)

(limited to 'include/net')

diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h
index 0503bb70e5d9..8ce6d3263e41 100644
--- a/include/net/sctp/constants.h
+++ b/include/net/sctp/constants.h
@@ -320,11 +320,11 @@ typedef enum {
 } sctp_xmit_t;
 
 /* These are the commands for manipulating transports.  */
-typedef enum {
+enum sctp_transport_cmd {
 	SCTP_TRANSPORT_UP,
 	SCTP_TRANSPORT_DOWN,
 	SCTP_TRANSPORT_PF,
-} sctp_transport_cmd_t;
+};
 
 /* These are the address scopes defined mainly for IPv4 addresses
  * based on draft of SCTP IPv4 scoping <draft-stewart-tsvwg-sctp-ipv4-00.txt>.
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index d771d418481f..d098d1c4ed74 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -1947,9 +1947,10 @@ void sctp_assoc_del_peer(struct sctp_association *asoc,
 			 const union sctp_addr *addr);
 void sctp_assoc_rm_peer(struct sctp_association *asoc,
 			 struct sctp_transport *peer);
-void sctp_assoc_control_transport(struct sctp_association *,
-				  struct sctp_transport *,
-				  sctp_transport_cmd_t, sctp_sn_error_t);
+void sctp_assoc_control_transport(struct sctp_association *asoc,
+				  struct sctp_transport *transport,
+				  enum sctp_transport_cmd command,
+				  sctp_sn_error_t error);
 struct sctp_transport *sctp_assoc_lookup_tsn(struct sctp_association *, __u32);
 struct sctp_transport *sctp_assoc_is_match(struct sctp_association *,
 					   struct net *,
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 4c1f1bb2aaad..b53efed8ff71 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -796,7 +796,7 @@ void sctp_assoc_del_nonprimary_peers(struct sctp_association *asoc,
  */
 void sctp_assoc_control_transport(struct sctp_association *asoc,
 				  struct sctp_transport *transport,
-				  sctp_transport_cmd_t command,
+				  enum sctp_transport_cmd command,
 				  sctp_sn_error_t error)
 {
 	struct sctp_ulpevent *event;
-- 
cgit v1.2.3


From 8496561430df6e2c21b4ed37bd93604d3acaf5d6 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Sat, 5 Aug 2017 19:59:56 +0800
Subject: sctp: remove the typedef sctp_sock_state_t

This patch is to remove the typedef sctp_sock_state_t, and
replace with enum sctp_sock_state in the places where it's
using this typedef.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/constants.h | 6 +++---
 include/net/sctp/sctp.h      | 3 ++-
 2 files changed, 5 insertions(+), 4 deletions(-)

(limited to 'include/net')

diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h
index 8ce6d3263e41..049868ea7ae9 100644
--- a/include/net/sctp/constants.h
+++ b/include/net/sctp/constants.h
@@ -42,7 +42,7 @@
 
 #include <linux/sctp.h>
 #include <linux/ipv6.h> /* For ipv6hdr. */
-#include <net/tcp_states.h>  /* For TCP states used in sctp_sock_state_t */
+#include <net/tcp_states.h>  /* For TCP states used in enum sctp_sock_state */
 
 /* Value used for stream negotiation. */
 enum { SCTP_MAX_STREAM = 0xffff };
@@ -214,13 +214,13 @@ typedef enum {
  * - A socket in SCTP_SS_ESTABLISHED state indicates that it has a single 
  *   association.
  */
-typedef enum {
+enum sctp_sock_state {
 	SCTP_SS_CLOSED         = TCP_CLOSE,
 	SCTP_SS_LISTENING      = TCP_LISTEN,
 	SCTP_SS_ESTABLISHING   = TCP_SYN_SENT,
 	SCTP_SS_ESTABLISHED    = TCP_ESTABLISHED,
 	SCTP_SS_CLOSING        = TCP_CLOSE_WAIT,
-} sctp_sock_state_t;
+};
 
 /* These functions map various type to printable names.  */
 const char *sctp_cname(const sctp_subtype_t);	/* chunk types */
diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index ca66b033ec38..0022bc713434 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -565,7 +565,8 @@ static inline int __sctp_state(const struct sctp_association *asoc,
 
 /* Is the socket in this state? */
 #define sctp_sstate(sk, state) __sctp_sstate((sk), (SCTP_SS_##state))
-static inline int __sctp_sstate(const struct sock *sk, sctp_sock_state_t state)
+static inline int __sctp_sstate(const struct sock *sk,
+				enum sctp_sock_state state)
 {
 	return sk->sk_state == state;
 }
-- 
cgit v1.2.3


From 86b36f2a9b9ea58dd2100b0e6f1f45a1f67ee95e Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Sat, 5 Aug 2017 19:59:57 +0800
Subject: sctp: remove the typedef sctp_xmit_t

This patch is to remove the typedef sctp_xmit_t, and
replace with enum sctp_xmit in the places where it's
using this typedef.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/constants.h |  4 +--
 include/net/sctp/structs.h   |  9 ++++---
 net/sctp/output.c            | 60 ++++++++++++++++++++++----------------------
 net/sctp/outqueue.c          |  8 +++---
 4 files changed, 41 insertions(+), 40 deletions(-)

(limited to 'include/net')

diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h
index 049868ea7ae9..311b2e3773e8 100644
--- a/include/net/sctp/constants.h
+++ b/include/net/sctp/constants.h
@@ -312,12 +312,12 @@ enum { SCTP_MAX_GABS = 16 };
 /* These return values describe the success or failure of a number of
  * routines which form the lower interface to SCTP_outqueue.
  */
-typedef enum {
+enum sctp_xmit {
 	SCTP_XMIT_OK,
 	SCTP_XMIT_PMTU_FULL,
 	SCTP_XMIT_RWND_FULL,
 	SCTP_XMIT_DELAY,
-} sctp_xmit_t;
+};
 
 /* These are the commands for manipulating transports.  */
 enum sctp_transport_cmd {
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index d098d1c4ed74..6fab67e7f1e5 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -697,10 +697,11 @@ struct sctp_packet {
 void sctp_packet_init(struct sctp_packet *, struct sctp_transport *,
 		      __u16 sport, __u16 dport);
 void sctp_packet_config(struct sctp_packet *, __u32 vtag, int);
-sctp_xmit_t sctp_packet_transmit_chunk(struct sctp_packet *,
-				       struct sctp_chunk *, int, gfp_t);
-sctp_xmit_t sctp_packet_append_chunk(struct sctp_packet *,
-                                     struct sctp_chunk *);
+enum sctp_xmit sctp_packet_transmit_chunk(struct sctp_packet *packet,
+					  struct sctp_chunk *chunk,
+					  int one_packet, gfp_t gfp);
+enum sctp_xmit sctp_packet_append_chunk(struct sctp_packet *packet,
+					struct sctp_chunk *chunk);
 int sctp_packet_transmit(struct sctp_packet *, gfp_t);
 void sctp_packet_free(struct sctp_packet *);
 
diff --git a/net/sctp/output.c b/net/sctp/output.c
index 9d8504985744..4a865cd06d76 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -57,15 +57,15 @@
 #include <net/sctp/checksum.h>
 
 /* Forward declarations for private helpers. */
-static sctp_xmit_t __sctp_packet_append_chunk(struct sctp_packet *packet,
-					      struct sctp_chunk *chunk);
-static sctp_xmit_t sctp_packet_can_append_data(struct sctp_packet *packet,
-					   struct sctp_chunk *chunk);
+static enum sctp_xmit __sctp_packet_append_chunk(struct sctp_packet *packet,
+						 struct sctp_chunk *chunk);
+static enum sctp_xmit sctp_packet_can_append_data(struct sctp_packet *packet,
+						  struct sctp_chunk *chunk);
 static void sctp_packet_append_data(struct sctp_packet *packet,
-					   struct sctp_chunk *chunk);
-static sctp_xmit_t sctp_packet_will_fit(struct sctp_packet *packet,
-					struct sctp_chunk *chunk,
-					u16 chunk_len);
+				    struct sctp_chunk *chunk);
+static enum sctp_xmit sctp_packet_will_fit(struct sctp_packet *packet,
+					   struct sctp_chunk *chunk,
+					   u16 chunk_len);
 
 static void sctp_packet_reset(struct sctp_packet *packet)
 {
@@ -181,11 +181,11 @@ void sctp_packet_free(struct sctp_packet *packet)
  * as it can fit in the packet, but any more data that does not fit in this
  * packet can be sent only after receiving the COOKIE_ACK.
  */
-sctp_xmit_t sctp_packet_transmit_chunk(struct sctp_packet *packet,
-				       struct sctp_chunk *chunk,
-				       int one_packet, gfp_t gfp)
+enum sctp_xmit sctp_packet_transmit_chunk(struct sctp_packet *packet,
+					  struct sctp_chunk *chunk,
+					  int one_packet, gfp_t gfp)
 {
-	sctp_xmit_t retval;
+	enum sctp_xmit retval;
 
 	pr_debug("%s: packet:%p size:%zu chunk:%p size:%d\n", __func__,
 		 packet, packet->size, chunk, chunk->skb ? chunk->skb->len : -1);
@@ -218,12 +218,12 @@ sctp_xmit_t sctp_packet_transmit_chunk(struct sctp_packet *packet,
 }
 
 /* Try to bundle an auth chunk into the packet. */
-static sctp_xmit_t sctp_packet_bundle_auth(struct sctp_packet *pkt,
-					   struct sctp_chunk *chunk)
+static enum sctp_xmit sctp_packet_bundle_auth(struct sctp_packet *pkt,
+					      struct sctp_chunk *chunk)
 {
 	struct sctp_association *asoc = pkt->transport->asoc;
+	enum sctp_xmit retval = SCTP_XMIT_OK;
 	struct sctp_chunk *auth;
-	sctp_xmit_t retval = SCTP_XMIT_OK;
 
 	/* if we don't have an association, we can't do authentication */
 	if (!asoc)
@@ -254,10 +254,10 @@ static sctp_xmit_t sctp_packet_bundle_auth(struct sctp_packet *pkt,
 }
 
 /* Try to bundle a SACK with the packet. */
-static sctp_xmit_t sctp_packet_bundle_sack(struct sctp_packet *pkt,
-					   struct sctp_chunk *chunk)
+static enum sctp_xmit sctp_packet_bundle_sack(struct sctp_packet *pkt,
+					      struct sctp_chunk *chunk)
 {
-	sctp_xmit_t retval = SCTP_XMIT_OK;
+	enum sctp_xmit retval = SCTP_XMIT_OK;
 
 	/* If sending DATA and haven't aleady bundled a SACK, try to
 	 * bundle one in to the packet.
@@ -299,11 +299,11 @@ out:
 /* Append a chunk to the offered packet reporting back any inability to do
  * so.
  */
-static sctp_xmit_t __sctp_packet_append_chunk(struct sctp_packet *packet,
-					      struct sctp_chunk *chunk)
+static enum sctp_xmit __sctp_packet_append_chunk(struct sctp_packet *packet,
+						 struct sctp_chunk *chunk)
 {
-	sctp_xmit_t retval = SCTP_XMIT_OK;
 	__u16 chunk_len = SCTP_PAD4(ntohs(chunk->chunk_hdr->length));
+	enum sctp_xmit retval = SCTP_XMIT_OK;
 
 	/* Check to see if this chunk will fit into the packet */
 	retval = sctp_packet_will_fit(packet, chunk, chunk_len);
@@ -353,10 +353,10 @@ finish:
 /* Append a chunk to the offered packet reporting back any inability to do
  * so.
  */
-sctp_xmit_t sctp_packet_append_chunk(struct sctp_packet *packet,
-				     struct sctp_chunk *chunk)
+enum sctp_xmit sctp_packet_append_chunk(struct sctp_packet *packet,
+					struct sctp_chunk *chunk)
 {
-	sctp_xmit_t retval = SCTP_XMIT_OK;
+	enum sctp_xmit retval = SCTP_XMIT_OK;
 
 	pr_debug("%s: packet:%p chunk:%p\n", __func__, packet, chunk);
 
@@ -653,8 +653,8 @@ out:
  ********************************************************************/
 
 /* This private function check to see if a chunk can be added */
-static sctp_xmit_t sctp_packet_can_append_data(struct sctp_packet *packet,
-					   struct sctp_chunk *chunk)
+static enum sctp_xmit sctp_packet_can_append_data(struct sctp_packet *packet,
+						  struct sctp_chunk *chunk)
 {
 	size_t datasize, rwnd, inflight, flight_size;
 	struct sctp_transport *transport = packet->transport;
@@ -762,12 +762,12 @@ static void sctp_packet_append_data(struct sctp_packet *packet,
 	sctp_chunk_assign_ssn(chunk);
 }
 
-static sctp_xmit_t sctp_packet_will_fit(struct sctp_packet *packet,
-					struct sctp_chunk *chunk,
-					u16 chunk_len)
+static enum sctp_xmit sctp_packet_will_fit(struct sctp_packet *packet,
+					   struct sctp_chunk *chunk,
+					   u16 chunk_len)
 {
+	enum sctp_xmit retval = SCTP_XMIT_OK;
 	size_t psize, pmtu, maxsize;
-	sctp_xmit_t retval = SCTP_XMIT_OK;
 
 	psize = packet->size;
 	if (packet->transport->asoc)
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index 08ee0ed9a0c6..2966ff400755 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -594,14 +594,14 @@ void sctp_retransmit(struct sctp_outq *q, struct sctp_transport *transport,
 static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt,
 			       int rtx_timeout, int *start_timer)
 {
-	struct list_head *lqueue;
 	struct sctp_transport *transport = pkt->transport;
-	sctp_xmit_t status;
 	struct sctp_chunk *chunk, *chunk1;
-	int fast_rtx;
+	struct list_head *lqueue;
+	enum sctp_xmit status;
 	int error = 0;
 	int timer = 0;
 	int done = 0;
+	int fast_rtx;
 
 	lqueue = &q->retransmit;
 	fast_rtx = q->fast_rtx;
@@ -781,7 +781,7 @@ static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp)
 	struct sctp_transport *transport = NULL;
 	struct sctp_transport *new_transport;
 	struct sctp_chunk *chunk, *tmp;
-	sctp_xmit_t status;
+	enum sctp_xmit status;
 	int error = 0;
 	int start_timer = 0;
 	int one_packet = 0;
-- 
cgit v1.2.3


From 4785c7ae1848244da3435ba5269f6288c15975c7 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Sat, 5 Aug 2017 19:59:58 +0800
Subject: sctp: remove the typedef sctp_ierror_t

This patch is to remove the typedef sctp_ierror_t, and
replace with enum sctp_ierror in the places where it's
using this typedef.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/constants.h |  5 ++---
 net/sctp/sm_make_chunk.c     | 23 ++++++++++++-----------
 net/sctp/sm_statefuns.c      | 28 +++++++++++++++-------------
 3 files changed, 29 insertions(+), 27 deletions(-)

(limited to 'include/net')

diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h
index 311b2e3773e8..9a694653b708 100644
--- a/include/net/sctp/constants.h
+++ b/include/net/sctp/constants.h
@@ -155,8 +155,7 @@ SCTP_SUBTYPE_CONSTRUCTOR(PRIMITIVE,	sctp_event_primitive_t,	primitive)
 				- sizeof(struct sctp_data_chunk)))
 
 /* Internal error codes */
-typedef enum {
-
+enum sctp_ierror {
 	SCTP_IERROR_NO_ERROR	        = 0,
 	SCTP_IERROR_BASE		= 1000,
 	SCTP_IERROR_NO_COOKIE,
@@ -177,7 +176,7 @@ typedef enum {
 	SCTP_IERROR_PROTO_VIOLATION,
 	SCTP_IERROR_ERROR,
 	SCTP_IERROR_ABORT,
-} sctp_ierror_t;
+};
 
 
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index a034d842e335..3a8fb1dffbc1 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -2065,10 +2065,11 @@ static void sctp_process_ext_param(struct sctp_association *asoc,
  * 	SCTP_IERROR_ERROR    - stop and report an error.
  * 	SCTP_IERROR_NOMEME   - out of memory.
  */
-static sctp_ierror_t sctp_process_unk_param(const struct sctp_association *asoc,
-					    union sctp_params param,
-					    struct sctp_chunk *chunk,
-					    struct sctp_chunk **errp)
+static enum sctp_ierror sctp_process_unk_param(
+					const struct sctp_association *asoc,
+					union sctp_params param,
+					struct sctp_chunk *chunk,
+					struct sctp_chunk **errp)
 {
 	int retval = SCTP_IERROR_NO_ERROR;
 
@@ -2117,13 +2118,13 @@ static sctp_ierror_t sctp_process_unk_param(const struct sctp_association *asoc,
  *	SCTP_IERROR_ERROR - stop processing, trigger an ERROR
  * 	SCTP_IERROR_NO_ERROR - continue with the chunk
  */
-static sctp_ierror_t sctp_verify_param(struct net *net,
-					const struct sctp_endpoint *ep,
-					const struct sctp_association *asoc,
-					union sctp_params param,
-					enum sctp_cid cid,
-					struct sctp_chunk *chunk,
-					struct sctp_chunk **err_chunk)
+static enum sctp_ierror sctp_verify_param(struct net *net,
+					  const struct sctp_endpoint *ep,
+					  const struct sctp_association *asoc,
+					  union sctp_params param,
+					  enum sctp_cid cid,
+					  struct sctp_chunk *chunk,
+					  struct sctp_chunk **err_chunk)
 {
 	struct sctp_hmac_algo_param *hmacs;
 	int retval = SCTP_IERROR_NO_ERROR;
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 8af90a5f23cd..5381697333df 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -144,11 +144,12 @@ static sctp_disposition_t sctp_sf_violation_chunk(
 				     void *arg,
 				     sctp_cmd_seq_t *commands);
 
-static sctp_ierror_t sctp_sf_authenticate(struct net *net,
-				    const struct sctp_endpoint *ep,
-				    const struct sctp_association *asoc,
-				    const sctp_subtype_t type,
-				    struct sctp_chunk *chunk);
+static enum sctp_ierror sctp_sf_authenticate(
+				struct net *net,
+				const struct sctp_endpoint *ep,
+				const struct sctp_association *asoc,
+				const sctp_subtype_t type,
+				struct sctp_chunk *chunk);
 
 static sctp_disposition_t __sctp_sf_do_9_1_abort(struct net *net,
 					const struct sctp_endpoint *ep,
@@ -756,7 +757,7 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(struct net *net,
 	 */
 	if (chunk->auth_chunk) {
 		struct sctp_chunk auth;
-		sctp_ierror_t ret;
+		enum sctp_ierror ret;
 
 		/* Make sure that we and the peer are AUTH capable */
 		if (!net->sctp.auth_enable || !new_asoc->peer.auth_capable) {
@@ -4077,11 +4078,12 @@ gen_shutdown:
  *
  * The return value is the disposition of the chunk.
  */
-static sctp_ierror_t sctp_sf_authenticate(struct net *net,
-				    const struct sctp_endpoint *ep,
-				    const struct sctp_association *asoc,
-				    const sctp_subtype_t type,
-				    struct sctp_chunk *chunk)
+static enum sctp_ierror sctp_sf_authenticate(
+				struct net *net,
+				const struct sctp_endpoint *ep,
+				const struct sctp_association *asoc,
+				const sctp_subtype_t type,
+				struct sctp_chunk *chunk)
 {
 	struct sctp_authhdr *auth_hdr;
 	struct sctp_hmac *hmac;
@@ -4159,10 +4161,10 @@ sctp_disposition_t sctp_sf_eat_auth(struct net *net,
 				    void *arg,
 				    sctp_cmd_seq_t *commands)
 {
-	struct sctp_authhdr *auth_hdr;
 	struct sctp_chunk *chunk = arg;
+	struct sctp_authhdr *auth_hdr;
 	struct sctp_chunk *err_chunk;
-	sctp_ierror_t error;
+	enum sctp_ierror error;
 
 	/* Make sure that the peer has AUTH capable */
 	if (!asoc->peer.auth_capable)
-- 
cgit v1.2.3


From 5210601945f5aedaf2d7f13a88436e27a39c6a8a Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Sat, 5 Aug 2017 19:59:59 +0800
Subject: sctp: remove the typedef sctp_state_t

This patch is to remove the typedef sctp_state_t, and
replace with enum sctp_state in the places where it's
using this typedef.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/command.h   |  4 ++--
 include/net/sctp/constants.h |  4 ++--
 include/net/sctp/sctp.h      |  2 +-
 include/net/sctp/sm.h        | 10 +++++-----
 include/net/sctp/structs.h   |  2 +-
 net/sctp/endpointola.c       |  2 +-
 net/sctp/primitive.c         |  2 +-
 net/sctp/sm_sideeffect.c     | 12 ++++++------
 net/sctp/sm_statetable.c     | 16 +++++++++-------
 9 files changed, 28 insertions(+), 26 deletions(-)

(limited to 'include/net')

diff --git a/include/net/sctp/command.h b/include/net/sctp/command.h
index 1d5f6ff3f440..be12ec946628 100644
--- a/include/net/sctp/command.h
+++ b/include/net/sctp/command.h
@@ -126,7 +126,7 @@ typedef union {
 	__u8 u8;
 	int error;
 	__be16 err;
-	sctp_state_t state;
+	enum sctp_state state;
 	sctp_event_timeout_t to;
 	struct sctp_chunk *chunk;
 	struct sctp_association *asoc;
@@ -167,7 +167,7 @@ SCTP_ARG_CONSTRUCTOR(U16,	__u16, u16)
 SCTP_ARG_CONSTRUCTOR(U8,	__u8, u8)
 SCTP_ARG_CONSTRUCTOR(ERROR,     int, error)
 SCTP_ARG_CONSTRUCTOR(PERR,      __be16, err)	/* protocol error */
-SCTP_ARG_CONSTRUCTOR(STATE,	sctp_state_t, state)
+SCTP_ARG_CONSTRUCTOR(STATE,	enum sctp_state, state)
 SCTP_ARG_CONSTRUCTOR(TO,	sctp_event_timeout_t, to)
 SCTP_ARG_CONSTRUCTOR(CHUNK,	struct sctp_chunk *, chunk)
 SCTP_ARG_CONSTRUCTOR(ASOC,	struct sctp_association *, asoc)
diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h
index 9a694653b708..db9f40b657ba 100644
--- a/include/net/sctp/constants.h
+++ b/include/net/sctp/constants.h
@@ -181,7 +181,7 @@ enum sctp_ierror {
 
 
 /* SCTP state defines for internal state machine */
-typedef enum {
+enum sctp_state {
 
 	SCTP_STATE_CLOSED		= 0,
 	SCTP_STATE_COOKIE_WAIT		= 1,
@@ -192,7 +192,7 @@ typedef enum {
 	SCTP_STATE_SHUTDOWN_RECEIVED	= 6,
 	SCTP_STATE_SHUTDOWN_ACK_SENT	= 7,
 
-} sctp_state_t;
+};
 
 #define SCTP_STATE_MAX			SCTP_STATE_SHUTDOWN_ACK_SENT
 #define SCTP_STATE_NUM_STATES		(SCTP_STATE_MAX + 1)
diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index 0022bc713434..24ff7931d38c 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -558,7 +558,7 @@ static inline int __sctp_style(const struct sock *sk, sctp_socket_type_t style)
 /* Is the association in this state? */
 #define sctp_state(asoc, state) __sctp_state((asoc), (SCTP_STATE_##state))
 static inline int __sctp_state(const struct sctp_association *asoc,
-			       sctp_state_t state)
+			       enum sctp_state state)
 {
 	return asoc->state == state;
 }
diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h
index 860f378333b5..281e8d1e0752 100644
--- a/include/net/sctp/sm.h
+++ b/include/net/sctp/sm.h
@@ -175,10 +175,10 @@ sctp_state_fn_t sctp_sf_autoclose_timer_expire;
 
 /* Prototypes for utility support functions.  */
 __u8 sctp_get_chunk_type(struct sctp_chunk *chunk);
-const sctp_sm_table_entry_t *sctp_sm_lookup_event(struct net *,
-					    sctp_event_t,
-					    sctp_state_t,
-					    sctp_subtype_t);
+const sctp_sm_table_entry_t *sctp_sm_lookup_event(struct net *net,
+						  sctp_event_t event_type,
+						  enum sctp_state state,
+						  sctp_subtype_t event_subtype);
 int sctp_chunk_iif(const struct sctp_chunk *);
 struct sctp_association *sctp_make_temp_asoc(const struct sctp_endpoint *,
 					     struct sctp_chunk *,
@@ -313,7 +313,7 @@ struct sctp_chunk *sctp_process_strreset_resp(
 /* Prototypes for statetable processing. */
 
 int sctp_do_sm(struct net *net, sctp_event_t event_type, sctp_subtype_t subtype,
-	       sctp_state_t state,
+	       enum sctp_state state,
                struct sctp_endpoint *,
                struct sctp_association *asoc,
                void *event_arg,
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 6fab67e7f1e5..fbe6e81b889b 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -1577,7 +1577,7 @@ struct sctp_association {
 	 *
 	 *		State takes values from SCTP_STATE_*.
 	 */
-	sctp_state_t state;
+	enum sctp_state state;
 
 	/* Overall     : The overall association error count.
 	 * Error Count : [Clear this any time I get something.]
diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index 3d506b2f6193..4111c00a9d9d 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c
@@ -383,7 +383,7 @@ static void sctp_endpoint_bh_rcv(struct work_struct *work)
 	struct sctp_chunk *chunk;
 	struct sctp_inq *inqueue;
 	sctp_subtype_t subtype;
-	sctp_state_t state;
+	enum sctp_state state;
 	int error = 0;
 	int first_time = 1;	/* is this the first time through the loop */
 
diff --git a/net/sctp/primitive.c b/net/sctp/primitive.c
index f0553a022859..1fb5b9bb3c6d 100644
--- a/net/sctp/primitive.c
+++ b/net/sctp/primitive.c
@@ -54,7 +54,7 @@ int sctp_primitive_ ## name(struct net *net, struct sctp_association *asoc, \
 			    void *arg) { \
 	int error = 0; \
 	sctp_event_t event_type; sctp_subtype_t subtype; \
-	sctp_state_t state; \
+	enum sctp_state state; \
 	struct sctp_endpoint *ep; \
 	\
 	event_type = SCTP_EVENT_T_PRIMITIVE; \
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index 5dda8c42b5f6..b77a81aa907b 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -53,7 +53,7 @@
 
 static int sctp_cmd_interpreter(sctp_event_t event_type,
 				sctp_subtype_t subtype,
-				sctp_state_t state,
+				enum sctp_state state,
 				struct sctp_endpoint *ep,
 				struct sctp_association *asoc,
 				void *event_arg,
@@ -61,7 +61,7 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
 				sctp_cmd_seq_t *commands,
 				gfp_t gfp);
 static int sctp_side_effects(sctp_event_t event_type, sctp_subtype_t subtype,
-			     sctp_state_t state,
+			     enum sctp_state state,
 			     struct sctp_endpoint *ep,
 			     struct sctp_association **asoc,
 			     void *event_arg,
@@ -843,7 +843,7 @@ static void sctp_cmd_assoc_update(sctp_cmd_seq_t *cmds,
 /* Helper function to change the state of an association. */
 static void sctp_cmd_new_state(sctp_cmd_seq_t *cmds,
 			       struct sctp_association *asoc,
-			       sctp_state_t state)
+			       enum sctp_state state)
 {
 	struct sock *sk = asoc->base.sk;
 
@@ -1140,7 +1140,7 @@ static void sctp_cmd_send_asconf(struct sctp_association *asoc)
  * good place to start.
  */
 int sctp_do_sm(struct net *net, sctp_event_t event_type, sctp_subtype_t subtype,
-	       sctp_state_t state,
+	       enum sctp_state state,
 	       struct sctp_endpoint *ep,
 	       struct sctp_association *asoc,
 	       void *event_arg,
@@ -1179,7 +1179,7 @@ int sctp_do_sm(struct net *net, sctp_event_t event_type, sctp_subtype_t subtype,
  * This the master state function side effect processing function.
  *****************************************************************/
 static int sctp_side_effects(sctp_event_t event_type, sctp_subtype_t subtype,
-			     sctp_state_t state,
+			     enum sctp_state state,
 			     struct sctp_endpoint *ep,
 			     struct sctp_association **asoc,
 			     void *event_arg,
@@ -1265,7 +1265,7 @@ bail:
 /* This is the side-effect interpreter.  */
 static int sctp_cmd_interpreter(sctp_event_t event_type,
 				sctp_subtype_t subtype,
-				sctp_state_t state,
+				enum sctp_state state,
 				struct sctp_endpoint *ep,
 				struct sctp_association *asoc,
 				void *event_arg,
diff --git a/net/sctp/sm_statetable.c b/net/sctp/sm_statetable.c
index 3e958c1c4b95..f46c11bffea7 100644
--- a/net/sctp/sm_statetable.c
+++ b/net/sctp/sm_statetable.c
@@ -52,9 +52,10 @@ other_event_table[SCTP_NUM_OTHER_TYPES][SCTP_STATE_NUM_STATES];
 static const sctp_sm_table_entry_t
 timeout_event_table[SCTP_NUM_TIMEOUT_TYPES][SCTP_STATE_NUM_STATES];
 
-static const sctp_sm_table_entry_t *sctp_chunk_event_lookup(struct net *net,
-							    enum sctp_cid cid,
-							    sctp_state_t state);
+static const sctp_sm_table_entry_t *sctp_chunk_event_lookup(
+						struct net *net,
+						enum sctp_cid cid,
+						enum sctp_state state);
 
 
 static const sctp_sm_table_entry_t bug = {
@@ -78,7 +79,7 @@ static const sctp_sm_table_entry_t bug = {
 
 const sctp_sm_table_entry_t *sctp_sm_lookup_event(struct net *net,
 						  sctp_event_t event_type,
-						  sctp_state_t state,
+						  enum sctp_state state,
 						  sctp_subtype_t event_subtype)
 {
 	switch (event_type) {
@@ -967,9 +968,10 @@ static const sctp_sm_table_entry_t timeout_event_table[SCTP_NUM_TIMEOUT_TYPES][S
 	TYPE_SCTP_EVENT_TIMEOUT_AUTOCLOSE,
 };
 
-static const sctp_sm_table_entry_t *sctp_chunk_event_lookup(struct net *net,
-							    enum sctp_cid cid,
-							    sctp_state_t state)
+static const sctp_sm_table_entry_t *sctp_chunk_event_lookup(
+						struct net *net,
+						enum sctp_cid cid,
+						enum sctp_state state)
 {
 	if (state > SCTP_STATE_MAX)
 		return &bug;
-- 
cgit v1.2.3


From dc1e0e6eb8b2e2c6b5c6cdfa4f0cc789e9516de5 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Sat, 5 Aug 2017 20:00:00 +0800
Subject: sctp: remove the typedef sctp_event_primitive_t

This patch is to remove the typedef sctp_event_primitive_t, and
replace with enum sctp_event_primitive in the places where it's
using this typedef.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/constants.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/net')

diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h
index db9f40b657ba..162ee954a6af 100644
--- a/include/net/sctp/constants.h
+++ b/include/net/sctp/constants.h
@@ -110,7 +110,7 @@ typedef enum {
 #define SCTP_NUM_OTHER_TYPES		(SCTP_EVENT_OTHER_MAX + 1)
 
 /* These are primitive requests from the ULP.  */
-typedef enum {
+enum sctp_event_primitive {
 	SCTP_PRIMITIVE_ASSOCIATE = 0,
 	SCTP_PRIMITIVE_SHUTDOWN,
 	SCTP_PRIMITIVE_ABORT,
@@ -118,7 +118,7 @@ typedef enum {
 	SCTP_PRIMITIVE_REQUESTHEARTBEAT,
 	SCTP_PRIMITIVE_ASCONF,
 	SCTP_PRIMITIVE_RECONF,
-} sctp_event_primitive_t;
+};
 
 #define SCTP_EVENT_PRIMITIVE_MAX	SCTP_PRIMITIVE_RECONF
 #define SCTP_NUM_PRIMITIVE_TYPES	(SCTP_EVENT_PRIMITIVE_MAX + 1)
@@ -133,7 +133,7 @@ typedef union {
 	enum sctp_cid chunk;
 	sctp_event_timeout_t timeout;
 	sctp_event_other_t other;
-	sctp_event_primitive_t primitive;
+	enum sctp_event_primitive primitive;
 } sctp_subtype_t;
 
 #define SCTP_SUBTYPE_CONSTRUCTOR(_name, _type, _elt) \
@@ -144,7 +144,7 @@ SCTP_ST_## _name (_type _arg)		\
 SCTP_SUBTYPE_CONSTRUCTOR(CHUNK,		enum sctp_cid,		chunk)
 SCTP_SUBTYPE_CONSTRUCTOR(TIMEOUT,	sctp_event_timeout_t,	timeout)
 SCTP_SUBTYPE_CONSTRUCTOR(OTHER,		sctp_event_other_t,	other)
-SCTP_SUBTYPE_CONSTRUCTOR(PRIMITIVE,	sctp_event_primitive_t,	primitive)
+SCTP_SUBTYPE_CONSTRUCTOR(PRIMITIVE,	enum sctp_event_primitive, primitive)
 
 
 #define sctp_chunk_is_data(a) (a->chunk_hdr->type == SCTP_CID_DATA)
-- 
cgit v1.2.3


From a0f098d0385a40f9c4c8a2ce48d075f77ea7edd8 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Sat, 5 Aug 2017 20:00:01 +0800
Subject: sctp: remove the typedef sctp_event_other_t

This patch is to remove the typedef sctp_event_other_t, and
replace with enum sctp_event_other in the places where it's
using this typedef.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/constants.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/net')

diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h
index 162ee954a6af..fd8a80ec573f 100644
--- a/include/net/sctp/constants.h
+++ b/include/net/sctp/constants.h
@@ -101,10 +101,10 @@ typedef enum {
 #define SCTP_EVENT_TIMEOUT_MAX		SCTP_EVENT_TIMEOUT_AUTOCLOSE
 #define SCTP_NUM_TIMEOUT_TYPES		(SCTP_EVENT_TIMEOUT_MAX + 1)
 
-typedef enum {
+enum sctp_event_other {
 	SCTP_EVENT_NO_PENDING_TSN = 0,
 	SCTP_EVENT_ICMP_PROTO_UNREACH,
-} sctp_event_other_t;
+};
 
 #define SCTP_EVENT_OTHER_MAX		SCTP_EVENT_ICMP_PROTO_UNREACH
 #define SCTP_NUM_OTHER_TYPES		(SCTP_EVENT_OTHER_MAX + 1)
@@ -132,7 +132,7 @@ enum sctp_event_primitive {
 typedef union {
 	enum sctp_cid chunk;
 	sctp_event_timeout_t timeout;
-	sctp_event_other_t other;
+	enum sctp_event_other other;
 	enum sctp_event_primitive primitive;
 } sctp_subtype_t;
 
@@ -143,7 +143,7 @@ SCTP_ST_## _name (_type _arg)		\
 
 SCTP_SUBTYPE_CONSTRUCTOR(CHUNK,		enum sctp_cid,		chunk)
 SCTP_SUBTYPE_CONSTRUCTOR(TIMEOUT,	sctp_event_timeout_t,	timeout)
-SCTP_SUBTYPE_CONSTRUCTOR(OTHER,		sctp_event_other_t,	other)
+SCTP_SUBTYPE_CONSTRUCTOR(OTHER,		enum sctp_event_other,	other)
 SCTP_SUBTYPE_CONSTRUCTOR(PRIMITIVE,	enum sctp_event_primitive, primitive)
 
 
-- 
cgit v1.2.3


From 19cd1592a24754e16d48398812d5f69b63f674dd Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Sat, 5 Aug 2017 20:00:02 +0800
Subject: sctp: remove the typedef sctp_event_timeout_t

This patch is to remove the typedef sctp_event_timeout_t, and
replace with enum sctp_event_timeout in the places where it's
using this typedef.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/command.h   | 4 ++--
 include/net/sctp/constants.h | 8 ++++----
 net/sctp/sm_sideeffect.c     | 6 +++---
 3 files changed, 9 insertions(+), 9 deletions(-)

(limited to 'include/net')

diff --git a/include/net/sctp/command.h b/include/net/sctp/command.h
index be12ec946628..376cb78b6247 100644
--- a/include/net/sctp/command.h
+++ b/include/net/sctp/command.h
@@ -127,7 +127,7 @@ typedef union {
 	int error;
 	__be16 err;
 	enum sctp_state state;
-	sctp_event_timeout_t to;
+	enum sctp_event_timeout to;
 	struct sctp_chunk *chunk;
 	struct sctp_association *asoc;
 	struct sctp_transport *transport;
@@ -168,7 +168,7 @@ SCTP_ARG_CONSTRUCTOR(U8,	__u8, u8)
 SCTP_ARG_CONSTRUCTOR(ERROR,     int, error)
 SCTP_ARG_CONSTRUCTOR(PERR,      __be16, err)	/* protocol error */
 SCTP_ARG_CONSTRUCTOR(STATE,	enum sctp_state, state)
-SCTP_ARG_CONSTRUCTOR(TO,	sctp_event_timeout_t, to)
+SCTP_ARG_CONSTRUCTOR(TO,	enum sctp_event_timeout, to)
 SCTP_ARG_CONSTRUCTOR(CHUNK,	struct sctp_chunk *, chunk)
 SCTP_ARG_CONSTRUCTOR(ASOC,	struct sctp_association *, asoc)
 SCTP_ARG_CONSTRUCTOR(TRANSPORT,	struct sctp_transport *, transport)
diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h
index fd8a80ec573f..fb931f07e83a 100644
--- a/include/net/sctp/constants.h
+++ b/include/net/sctp/constants.h
@@ -84,7 +84,7 @@ typedef enum {
  * SCTP_ULP_* to the list of possible chunks.
  */
 
-typedef enum {
+enum sctp_event_timeout {
 	SCTP_EVENT_TIMEOUT_NONE = 0,
 	SCTP_EVENT_TIMEOUT_T1_COOKIE,
 	SCTP_EVENT_TIMEOUT_T1_INIT,
@@ -96,7 +96,7 @@ typedef enum {
 	SCTP_EVENT_TIMEOUT_RECONF,
 	SCTP_EVENT_TIMEOUT_SACK,
 	SCTP_EVENT_TIMEOUT_AUTOCLOSE,
-} sctp_event_timeout_t;
+};
 
 #define SCTP_EVENT_TIMEOUT_MAX		SCTP_EVENT_TIMEOUT_AUTOCLOSE
 #define SCTP_NUM_TIMEOUT_TYPES		(SCTP_EVENT_TIMEOUT_MAX + 1)
@@ -131,7 +131,7 @@ enum sctp_event_primitive {
 
 typedef union {
 	enum sctp_cid chunk;
-	sctp_event_timeout_t timeout;
+	enum sctp_event_timeout timeout;
 	enum sctp_event_other other;
 	enum sctp_event_primitive primitive;
 } sctp_subtype_t;
@@ -142,7 +142,7 @@ SCTP_ST_## _name (_type _arg)		\
 { sctp_subtype_t _retval; _retval._elt = _arg; return _retval; }
 
 SCTP_SUBTYPE_CONSTRUCTOR(CHUNK,		enum sctp_cid,		chunk)
-SCTP_SUBTYPE_CONSTRUCTOR(TIMEOUT,	sctp_event_timeout_t,	timeout)
+SCTP_SUBTYPE_CONSTRUCTOR(TIMEOUT,	enum sctp_event_timeout, timeout)
 SCTP_SUBTYPE_CONSTRUCTOR(OTHER,		enum sctp_event_other,	other)
 SCTP_SUBTYPE_CONSTRUCTOR(PRIMITIVE,	enum sctp_event_primitive, primitive)
 
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index b77a81aa907b..11a344896b71 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -280,7 +280,7 @@ out_unlock:
  * for timeouts which use the association as their parameter.
  */
 static void sctp_generate_timeout_event(struct sctp_association *asoc,
-					sctp_event_timeout_t timeout_type)
+					enum sctp_event_timeout timeout_type)
 {
 	struct sock *sk = asoc->base.sk;
 	struct net *net = sock_net(sk);
@@ -1052,8 +1052,8 @@ static void sctp_cmd_adaptation_ind(sctp_cmd_seq_t *commands,
 
 
 static void sctp_cmd_t1_timer_update(struct sctp_association *asoc,
-				    sctp_event_timeout_t timer,
-				    char *name)
+				     enum sctp_event_timeout timer,
+				     char *name)
 {
 	struct sctp_transport *t;
 
-- 
cgit v1.2.3


From 61f0eb072294a148f707335d4d7f858b2af73770 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Sat, 5 Aug 2017 20:00:03 +0800
Subject: sctp: remove the typedef sctp_event_t

This patch is to remove the typedef sctp_event_t, and
replace with enum sctp_event in the places where it's
using this typedef.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/constants.h |  6 ++----
 include/net/sctp/sm.h        | 12 +++++-------
 net/sctp/primitive.c         |  2 +-
 net/sctp/sm_sideeffect.c     | 20 +++++++++-----------
 net/sctp/sm_statetable.c     |  2 +-
 5 files changed, 18 insertions(+), 24 deletions(-)

(limited to 'include/net')

diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h
index fb931f07e83a..3181e0f95b60 100644
--- a/include/net/sctp/constants.h
+++ b/include/net/sctp/constants.h
@@ -71,14 +71,12 @@ enum { SCTP_DEFAULT_INSTREAMS = SCTP_MAX_STREAM };
 					 SCTP_NUM_AUTH_CHUNK_TYPES)
 
 /* These are the different flavours of event.  */
-typedef enum {
-
+enum sctp_event {
 	SCTP_EVENT_T_CHUNK = 1,
 	SCTP_EVENT_T_TIMEOUT,
 	SCTP_EVENT_T_OTHER,
 	SCTP_EVENT_T_PRIMITIVE
-
-} sctp_event_t;
+};
 
 /* As a convenience for the state machine, we append SCTP_EVENT_* and
  * SCTP_ULP_* to the list of possible chunks.
diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h
index 281e8d1e0752..96f54cff7964 100644
--- a/include/net/sctp/sm.h
+++ b/include/net/sctp/sm.h
@@ -176,7 +176,7 @@ sctp_state_fn_t sctp_sf_autoclose_timer_expire;
 /* Prototypes for utility support functions.  */
 __u8 sctp_get_chunk_type(struct sctp_chunk *chunk);
 const sctp_sm_table_entry_t *sctp_sm_lookup_event(struct net *net,
-						  sctp_event_t event_type,
+						  enum sctp_event event_type,
 						  enum sctp_state state,
 						  sctp_subtype_t event_subtype);
 int sctp_chunk_iif(const struct sctp_chunk *);
@@ -312,12 +312,10 @@ struct sctp_chunk *sctp_process_strreset_resp(
 
 /* Prototypes for statetable processing. */
 
-int sctp_do_sm(struct net *net, sctp_event_t event_type, sctp_subtype_t subtype,
-	       enum sctp_state state,
-               struct sctp_endpoint *,
-               struct sctp_association *asoc,
-               void *event_arg,
-	       gfp_t gfp);
+int sctp_do_sm(struct net *net, enum sctp_event event_type,
+	       sctp_subtype_t subtype, enum sctp_state state,
+	       struct sctp_endpoint *ep, struct sctp_association *asoc,
+	       void *event_arg, gfp_t gfp);
 
 /* 2nd level prototypes */
 void sctp_generate_t3_rtx_event(unsigned long peer);
diff --git a/net/sctp/primitive.c b/net/sctp/primitive.c
index 1fb5b9bb3c6d..c914166984b3 100644
--- a/net/sctp/primitive.c
+++ b/net/sctp/primitive.c
@@ -53,7 +53,7 @@
 int sctp_primitive_ ## name(struct net *net, struct sctp_association *asoc, \
 			    void *arg) { \
 	int error = 0; \
-	sctp_event_t event_type; sctp_subtype_t subtype; \
+	enum sctp_event event_type; sctp_subtype_t subtype; \
 	enum sctp_state state; \
 	struct sctp_endpoint *ep; \
 	\
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index 11a344896b71..b545c768cb9e 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -51,7 +51,7 @@
 #include <net/sctp/sctp.h>
 #include <net/sctp/sm.h>
 
-static int sctp_cmd_interpreter(sctp_event_t event_type,
+static int sctp_cmd_interpreter(enum sctp_event event_type,
 				sctp_subtype_t subtype,
 				enum sctp_state state,
 				struct sctp_endpoint *ep,
@@ -60,7 +60,7 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
 				sctp_disposition_t status,
 				sctp_cmd_seq_t *commands,
 				gfp_t gfp);
-static int sctp_side_effects(sctp_event_t event_type, sctp_subtype_t subtype,
+static int sctp_side_effects(enum sctp_event event_type, sctp_subtype_t subtype,
 			     enum sctp_state state,
 			     struct sctp_endpoint *ep,
 			     struct sctp_association **asoc,
@@ -602,7 +602,7 @@ static void sctp_cmd_init_failed(sctp_cmd_seq_t *commands,
 /* Worker routine to handle SCTP_CMD_ASSOC_FAILED.  */
 static void sctp_cmd_assoc_failed(sctp_cmd_seq_t *commands,
 				  struct sctp_association *asoc,
-				  sctp_event_t event_type,
+				  enum sctp_event event_type,
 				  sctp_subtype_t subtype,
 				  struct sctp_chunk *chunk,
 				  unsigned int error)
@@ -1139,12 +1139,10 @@ static void sctp_cmd_send_asconf(struct sctp_association *asoc)
  * If you want to understand all of lksctp, this is a
  * good place to start.
  */
-int sctp_do_sm(struct net *net, sctp_event_t event_type, sctp_subtype_t subtype,
-	       enum sctp_state state,
-	       struct sctp_endpoint *ep,
-	       struct sctp_association *asoc,
-	       void *event_arg,
-	       gfp_t gfp)
+int sctp_do_sm(struct net *net, enum sctp_event event_type,
+	       sctp_subtype_t subtype, enum sctp_state state,
+	       struct sctp_endpoint *ep, struct sctp_association *asoc,
+	       void *event_arg, gfp_t gfp)
 {
 	sctp_cmd_seq_t commands;
 	const sctp_sm_table_entry_t *state_fn;
@@ -1178,7 +1176,7 @@ int sctp_do_sm(struct net *net, sctp_event_t event_type, sctp_subtype_t subtype,
 /*****************************************************************
  * This the master state function side effect processing function.
  *****************************************************************/
-static int sctp_side_effects(sctp_event_t event_type, sctp_subtype_t subtype,
+static int sctp_side_effects(enum sctp_event event_type, sctp_subtype_t subtype,
 			     enum sctp_state state,
 			     struct sctp_endpoint *ep,
 			     struct sctp_association **asoc,
@@ -1263,7 +1261,7 @@ bail:
  ********************************************************************/
 
 /* This is the side-effect interpreter.  */
-static int sctp_cmd_interpreter(sctp_event_t event_type,
+static int sctp_cmd_interpreter(enum sctp_event event_type,
 				sctp_subtype_t subtype,
 				enum sctp_state state,
 				struct sctp_endpoint *ep,
diff --git a/net/sctp/sm_statetable.c b/net/sctp/sm_statetable.c
index f46c11bffea7..f7cdb7014244 100644
--- a/net/sctp/sm_statetable.c
+++ b/net/sctp/sm_statetable.c
@@ -78,7 +78,7 @@ static const sctp_sm_table_entry_t bug = {
 })
 
 const sctp_sm_table_entry_t *sctp_sm_lookup_event(struct net *net,
-						  sctp_event_t event_type,
+						  enum sctp_event event_type,
 						  enum sctp_state state,
 						  sctp_subtype_t event_subtype)
 {
-- 
cgit v1.2.3


From bfc6f8270fefb323662d1d7713f940149f27b7f1 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Sat, 5 Aug 2017 20:00:04 +0800
Subject: sctp: remove the typedef sctp_subtype_t

This patch is to remove the typedef sctp_subtype_t, and
replace with union sctp_subtype in the places where it's
using this typedef.

Note that it doesn't fix many indents although it should,
as sctp_disposition_t's removal would mess them up again.
So better to fix them when removing sctp_disposition_t in
later patch.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/constants.h |  18 ++--
 include/net/sctp/sm.h        |  13 +--
 net/sctp/associola.c         |   2 +-
 net/sctp/debug.c             |   8 +-
 net/sctp/endpointola.c       |   2 +-
 net/sctp/primitive.c         |   2 +-
 net/sctp/probe.c             |   2 +-
 net/sctp/sm_sideeffect.c     |  16 ++--
 net/sctp/sm_statefuns.c      | 191 ++++++++++++++++++++++---------------------
 net/sctp/sm_statetable.c     |   9 +-
 10 files changed, 134 insertions(+), 129 deletions(-)

(limited to 'include/net')

diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h
index 3181e0f95b60..deaafa9b09cb 100644
--- a/include/net/sctp/constants.h
+++ b/include/net/sctp/constants.h
@@ -124,20 +124,20 @@ enum sctp_event_primitive {
 /* We define here a utility type for manipulating subtypes.
  * The subtype constructors all work like this:
  *
- * 	sctp_subtype_t foo = SCTP_ST_CHUNK(SCTP_CID_INIT);
+ *   union sctp_subtype foo = SCTP_ST_CHUNK(SCTP_CID_INIT);
  */
 
-typedef union {
+union sctp_subtype {
 	enum sctp_cid chunk;
 	enum sctp_event_timeout timeout;
 	enum sctp_event_other other;
 	enum sctp_event_primitive primitive;
-} sctp_subtype_t;
+};
 
 #define SCTP_SUBTYPE_CONSTRUCTOR(_name, _type, _elt) \
-static inline sctp_subtype_t	\
+static inline union sctp_subtype	\
 SCTP_ST_## _name (_type _arg)		\
-{ sctp_subtype_t _retval; _retval._elt = _arg; return _retval; }
+{ union sctp_subtype _retval; _retval._elt = _arg; return _retval; }
 
 SCTP_SUBTYPE_CONSTRUCTOR(CHUNK,		enum sctp_cid,		chunk)
 SCTP_SUBTYPE_CONSTRUCTOR(TIMEOUT,	enum sctp_event_timeout, timeout)
@@ -220,10 +220,10 @@ enum sctp_sock_state {
 };
 
 /* These functions map various type to printable names.  */
-const char *sctp_cname(const sctp_subtype_t);	/* chunk types */
-const char *sctp_oname(const sctp_subtype_t);	/* other events */
-const char *sctp_tname(const sctp_subtype_t);	/* timeouts */
-const char *sctp_pname(const sctp_subtype_t);	/* primitives */
+const char *sctp_cname(const union sctp_subtype id);	/* chunk types */
+const char *sctp_oname(const union sctp_subtype id);	/* other events */
+const char *sctp_tname(const union sctp_subtype id);	/* timeouts */
+const char *sctp_pname(const union sctp_subtype id);	/* primitives */
 
 /* This is a table of printable names of sctp_state_t's.  */
 extern const char *const sctp_state_tbl[];
diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h
index 96f54cff7964..1e7651c3b158 100644
--- a/include/net/sctp/sm.h
+++ b/include/net/sctp/sm.h
@@ -73,7 +73,7 @@ typedef struct {
 typedef sctp_disposition_t (sctp_state_fn_t) (struct net *,
 					      const struct sctp_endpoint *,
 					      const struct sctp_association *,
-					      const sctp_subtype_t type,
+					      const union sctp_subtype type,
 					      void *arg,
 					      sctp_cmd_seq_t *);
 typedef void (sctp_timer_event_t) (unsigned long);
@@ -175,10 +175,11 @@ sctp_state_fn_t sctp_sf_autoclose_timer_expire;
 
 /* Prototypes for utility support functions.  */
 __u8 sctp_get_chunk_type(struct sctp_chunk *chunk);
-const sctp_sm_table_entry_t *sctp_sm_lookup_event(struct net *net,
-						  enum sctp_event event_type,
-						  enum sctp_state state,
-						  sctp_subtype_t event_subtype);
+const sctp_sm_table_entry_t *sctp_sm_lookup_event(
+					struct net *net,
+					enum sctp_event event_type,
+					enum sctp_state state,
+					union sctp_subtype event_subtype);
 int sctp_chunk_iif(const struct sctp_chunk *);
 struct sctp_association *sctp_make_temp_asoc(const struct sctp_endpoint *,
 					     struct sctp_chunk *,
@@ -313,7 +314,7 @@ struct sctp_chunk *sctp_process_strreset_resp(
 /* Prototypes for statetable processing. */
 
 int sctp_do_sm(struct net *net, enum sctp_event event_type,
-	       sctp_subtype_t subtype, enum sctp_state state,
+	       union sctp_subtype subtype, enum sctp_state state,
 	       struct sctp_endpoint *ep, struct sctp_association *asoc,
 	       void *event_arg, gfp_t gfp);
 
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index b53efed8ff71..dfb9651e818b 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -1021,11 +1021,11 @@ static void sctp_assoc_bh_rcv(struct work_struct *work)
 		container_of(work, struct sctp_association,
 			     base.inqueue.immediate);
 	struct net *net = sock_net(asoc->base.sk);
+	union sctp_subtype subtype;
 	struct sctp_endpoint *ep;
 	struct sctp_chunk *chunk;
 	struct sctp_inq *inqueue;
 	int state;
-	sctp_subtype_t subtype;
 	int error = 0;
 
 	/* The association should be held so we should be safe. */
diff --git a/net/sctp/debug.c b/net/sctp/debug.c
index 2e47eb2f05cb..3f619fdcbf0a 100644
--- a/net/sctp/debug.c
+++ b/net/sctp/debug.c
@@ -60,7 +60,7 @@ static const char *const sctp_cid_tbl[SCTP_NUM_BASE_CHUNK_TYPES] = {
 };
 
 /* Lookup "chunk type" debug name. */
-const char *sctp_cname(const sctp_subtype_t cid)
+const char *sctp_cname(const union sctp_subtype cid)
 {
 	if (cid.chunk <= SCTP_CID_BASE_MAX)
 		return sctp_cid_tbl[cid.chunk];
@@ -130,7 +130,7 @@ static const char *const sctp_primitive_tbl[SCTP_NUM_PRIMITIVE_TYPES] = {
 };
 
 /* Lookup primitive debug name. */
-const char *sctp_pname(const sctp_subtype_t id)
+const char *sctp_pname(const union sctp_subtype id)
 {
 	if (id.primitive <= SCTP_EVENT_PRIMITIVE_MAX)
 		return sctp_primitive_tbl[id.primitive];
@@ -143,7 +143,7 @@ static const char *const sctp_other_tbl[] = {
 };
 
 /* Lookup "other" debug name. */
-const char *sctp_oname(const sctp_subtype_t id)
+const char *sctp_oname(const union sctp_subtype id)
 {
 	if (id.other <= SCTP_EVENT_OTHER_MAX)
 		return sctp_other_tbl[id.other];
@@ -165,7 +165,7 @@ static const char *const sctp_timer_tbl[] = {
 };
 
 /* Lookup timer debug name. */
-const char *sctp_tname(const sctp_subtype_t id)
+const char *sctp_tname(const union sctp_subtype id)
 {
 	BUILD_BUG_ON(SCTP_EVENT_TIMEOUT_MAX + 1 != ARRAY_SIZE(sctp_timer_tbl));
 
diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index 4111c00a9d9d..ee1e601a0b11 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c
@@ -382,7 +382,7 @@ static void sctp_endpoint_bh_rcv(struct work_struct *work)
 	struct sctp_transport *transport;
 	struct sctp_chunk *chunk;
 	struct sctp_inq *inqueue;
-	sctp_subtype_t subtype;
+	union sctp_subtype subtype;
 	enum sctp_state state;
 	int error = 0;
 	int first_time = 1;	/* is this the first time through the loop */
diff --git a/net/sctp/primitive.c b/net/sctp/primitive.c
index c914166984b3..c0817f7a8964 100644
--- a/net/sctp/primitive.c
+++ b/net/sctp/primitive.c
@@ -53,7 +53,7 @@
 int sctp_primitive_ ## name(struct net *net, struct sctp_association *asoc, \
 			    void *arg) { \
 	int error = 0; \
-	enum sctp_event event_type; sctp_subtype_t subtype; \
+	enum sctp_event event_type; union sctp_subtype subtype; \
 	enum sctp_state state; \
 	struct sctp_endpoint *ep; \
 	\
diff --git a/net/sctp/probe.c b/net/sctp/probe.c
index 6cc2152e0740..43837dfc86a7 100644
--- a/net/sctp/probe.c
+++ b/net/sctp/probe.c
@@ -130,7 +130,7 @@ static const struct file_operations sctpprobe_fops = {
 static sctp_disposition_t jsctp_sf_eat_sack(struct net *net,
 					    const struct sctp_endpoint *ep,
 					    const struct sctp_association *asoc,
-					    const sctp_subtype_t type,
+					    const union sctp_subtype type,
 					    void *arg,
 					    sctp_cmd_seq_t *commands)
 {
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index b545c768cb9e..4a12d29d9aa1 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -52,7 +52,7 @@
 #include <net/sctp/sm.h>
 
 static int sctp_cmd_interpreter(enum sctp_event event_type,
-				sctp_subtype_t subtype,
+				union sctp_subtype subtype,
 				enum sctp_state state,
 				struct sctp_endpoint *ep,
 				struct sctp_association *asoc,
@@ -60,7 +60,8 @@ static int sctp_cmd_interpreter(enum sctp_event event_type,
 				sctp_disposition_t status,
 				sctp_cmd_seq_t *commands,
 				gfp_t gfp);
-static int sctp_side_effects(enum sctp_event event_type, sctp_subtype_t subtype,
+static int sctp_side_effects(enum sctp_event event_type,
+			     union sctp_subtype subtype,
 			     enum sctp_state state,
 			     struct sctp_endpoint *ep,
 			     struct sctp_association **asoc,
@@ -603,7 +604,7 @@ static void sctp_cmd_init_failed(sctp_cmd_seq_t *commands,
 static void sctp_cmd_assoc_failed(sctp_cmd_seq_t *commands,
 				  struct sctp_association *asoc,
 				  enum sctp_event event_type,
-				  sctp_subtype_t subtype,
+				  union sctp_subtype subtype,
 				  struct sctp_chunk *chunk,
 				  unsigned int error)
 {
@@ -1140,7 +1141,7 @@ static void sctp_cmd_send_asconf(struct sctp_association *asoc)
  * good place to start.
  */
 int sctp_do_sm(struct net *net, enum sctp_event event_type,
-	       sctp_subtype_t subtype, enum sctp_state state,
+	       union sctp_subtype subtype, enum sctp_state state,
 	       struct sctp_endpoint *ep, struct sctp_association *asoc,
 	       void *event_arg, gfp_t gfp)
 {
@@ -1148,7 +1149,7 @@ int sctp_do_sm(struct net *net, enum sctp_event event_type,
 	const sctp_sm_table_entry_t *state_fn;
 	sctp_disposition_t status;
 	int error = 0;
-	typedef const char *(printfn_t)(sctp_subtype_t);
+	typedef const char *(printfn_t)(union sctp_subtype);
 	static printfn_t *table[] = {
 		NULL, sctp_cname, sctp_tname, sctp_oname, sctp_pname,
 	};
@@ -1176,7 +1177,8 @@ int sctp_do_sm(struct net *net, enum sctp_event event_type,
 /*****************************************************************
  * This the master state function side effect processing function.
  *****************************************************************/
-static int sctp_side_effects(enum sctp_event event_type, sctp_subtype_t subtype,
+static int sctp_side_effects(enum sctp_event event_type,
+			     union sctp_subtype subtype,
 			     enum sctp_state state,
 			     struct sctp_endpoint *ep,
 			     struct sctp_association **asoc,
@@ -1262,7 +1264,7 @@ bail:
 
 /* This is the side-effect interpreter.  */
 static int sctp_cmd_interpreter(enum sctp_event event_type,
-				sctp_subtype_t subtype,
+				union sctp_subtype subtype,
 				enum sctp_state state,
 				struct sctp_endpoint *ep,
 				struct sctp_association *asoc,
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 5381697333df..ac6aaa046529 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -80,19 +80,19 @@ static void sctp_send_stale_cookie_err(struct net *net,
 static sctp_disposition_t sctp_sf_do_5_2_6_stale(struct net *net,
 						 const struct sctp_endpoint *ep,
 						 const struct sctp_association *asoc,
-						 const sctp_subtype_t type,
+						 const union sctp_subtype type,
 						 void *arg,
 						 sctp_cmd_seq_t *commands);
 static sctp_disposition_t sctp_sf_shut_8_4_5(struct net *net,
 					     const struct sctp_endpoint *ep,
 					     const struct sctp_association *asoc,
-					     const sctp_subtype_t type,
+					     const union sctp_subtype type,
 					     void *arg,
 					     sctp_cmd_seq_t *commands);
 static sctp_disposition_t sctp_sf_tabort_8_4_8(struct net *net,
 					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
-					const sctp_subtype_t type,
+					const union sctp_subtype type,
 					void *arg,
 					sctp_cmd_seq_t *commands);
 static struct sctp_sackhdr *sctp_sm_pull_sack(struct sctp_chunk *chunk);
@@ -116,7 +116,7 @@ static sctp_disposition_t sctp_sf_violation_chunklen(
 				     struct net *net,
 				     const struct sctp_endpoint *ep,
 				     const struct sctp_association *asoc,
-				     const sctp_subtype_t type,
+				     const union sctp_subtype type,
 				     void *arg,
 				     sctp_cmd_seq_t *commands);
 
@@ -124,7 +124,7 @@ static sctp_disposition_t sctp_sf_violation_paramlen(
 				     struct net *net,
 				     const struct sctp_endpoint *ep,
 				     const struct sctp_association *asoc,
-				     const sctp_subtype_t type,
+				     const union sctp_subtype type,
 				     void *arg, void *ext,
 				     sctp_cmd_seq_t *commands);
 
@@ -132,7 +132,7 @@ static sctp_disposition_t sctp_sf_violation_ctsn(
 				     struct net *net,
 				     const struct sctp_endpoint *ep,
 				     const struct sctp_association *asoc,
-				     const sctp_subtype_t type,
+				     const union sctp_subtype type,
 				     void *arg,
 				     sctp_cmd_seq_t *commands);
 
@@ -140,7 +140,7 @@ static sctp_disposition_t sctp_sf_violation_chunk(
 				     struct net *net,
 				     const struct sctp_endpoint *ep,
 				     const struct sctp_association *asoc,
-				     const sctp_subtype_t type,
+				     const union sctp_subtype type,
 				     void *arg,
 				     sctp_cmd_seq_t *commands);
 
@@ -148,13 +148,13 @@ static enum sctp_ierror sctp_sf_authenticate(
 				struct net *net,
 				const struct sctp_endpoint *ep,
 				const struct sctp_association *asoc,
-				const sctp_subtype_t type,
+				const union sctp_subtype type,
 				struct sctp_chunk *chunk);
 
 static sctp_disposition_t __sctp_sf_do_9_1_abort(struct net *net,
 					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
-					const sctp_subtype_t type,
+					const union sctp_subtype type,
 					void *arg,
 					sctp_cmd_seq_t *commands);
 
@@ -217,7 +217,7 @@ sctp_chunk_length_valid(struct sctp_chunk *chunk, __u16 required_length)
 sctp_disposition_t sctp_sf_do_4_C(struct net *net,
 				  const struct sctp_endpoint *ep,
 				  const struct sctp_association *asoc,
-				  const sctp_subtype_t type,
+				  const union sctp_subtype type,
 				  void *arg,
 				  sctp_cmd_seq_t *commands)
 {
@@ -303,7 +303,7 @@ sctp_disposition_t sctp_sf_do_4_C(struct net *net,
 sctp_disposition_t sctp_sf_do_5_1B_init(struct net *net,
 					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
-					const sctp_subtype_t type,
+					const union sctp_subtype type,
 					void *arg,
 					sctp_cmd_seq_t *commands)
 {
@@ -497,7 +497,7 @@ nomem:
 sctp_disposition_t sctp_sf_do_5_1C_ack(struct net *net,
 				       const struct sctp_endpoint *ep,
 				       const struct sctp_association *asoc,
-				       const sctp_subtype_t type,
+				       const union sctp_subtype type,
 				       void *arg,
 				       sctp_cmd_seq_t *commands)
 {
@@ -647,7 +647,7 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(struct net *net,
 sctp_disposition_t sctp_sf_do_5_1D_ce(struct net *net,
 				      const struct sctp_endpoint *ep,
 				      const struct sctp_association *asoc,
-				      const sctp_subtype_t type, void *arg,
+				      const union sctp_subtype type, void *arg,
 				      sctp_cmd_seq_t *commands)
 {
 	struct sctp_chunk *chunk = arg;
@@ -874,7 +874,7 @@ nomem:
 sctp_disposition_t sctp_sf_do_5_1E_ca(struct net *net,
 				      const struct sctp_endpoint *ep,
 				      const struct sctp_association *asoc,
-				      const sctp_subtype_t type, void *arg,
+				      const union sctp_subtype type, void *arg,
 				      sctp_cmd_seq_t *commands)
 {
 	struct sctp_chunk *chunk = arg;
@@ -951,7 +951,7 @@ nomem:
 /* Generate and sendout a heartbeat packet.  */
 static sctp_disposition_t sctp_sf_heartbeat(const struct sctp_endpoint *ep,
 					    const struct sctp_association *asoc,
-					    const sctp_subtype_t type,
+					    const union sctp_subtype type,
 					    void *arg,
 					    sctp_cmd_seq_t *commands)
 {
@@ -977,7 +977,7 @@ static sctp_disposition_t sctp_sf_heartbeat(const struct sctp_endpoint *ep,
 sctp_disposition_t sctp_sf_sendbeat_8_3(struct net *net,
 					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
-					const sctp_subtype_t type,
+					const union sctp_subtype type,
 					void *arg,
 					sctp_cmd_seq_t *commands)
 {
@@ -1025,7 +1025,7 @@ sctp_disposition_t sctp_sf_sendbeat_8_3(struct net *net,
 sctp_disposition_t sctp_sf_send_reconf(struct net *net,
 				       const struct sctp_endpoint *ep,
 				       const struct sctp_association *asoc,
-				       const sctp_subtype_t type, void *arg,
+				       const union sctp_subtype type, void *arg,
 				       sctp_cmd_seq_t *commands)
 {
 	struct sctp_transport *transport = arg;
@@ -1076,7 +1076,7 @@ sctp_disposition_t sctp_sf_send_reconf(struct net *net,
 sctp_disposition_t sctp_sf_beat_8_3(struct net *net,
 				    const struct sctp_endpoint *ep,
 				    const struct sctp_association *asoc,
-				    const sctp_subtype_t type,
+				    const union sctp_subtype type,
 				    void *arg,
 				    sctp_cmd_seq_t *commands)
 {
@@ -1151,7 +1151,7 @@ nomem:
 sctp_disposition_t sctp_sf_backbeat_8_3(struct net *net,
 					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
-					const sctp_subtype_t type,
+					const union sctp_subtype type,
 					void *arg,
 					sctp_cmd_seq_t *commands)
 {
@@ -1416,7 +1416,7 @@ static sctp_disposition_t sctp_sf_do_unexpected_init(
 	struct net *net,
 	const struct sctp_endpoint *ep,
 	const struct sctp_association *asoc,
-	const sctp_subtype_t type,
+	const union sctp_subtype type,
 	void *arg, sctp_cmd_seq_t *commands)
 {
 	struct sctp_chunk *chunk = arg, *repl, *err_chunk;
@@ -1627,7 +1627,7 @@ cleanup:
 sctp_disposition_t sctp_sf_do_5_2_1_siminit(struct net *net,
 				    const struct sctp_endpoint *ep,
 				    const struct sctp_association *asoc,
-				    const sctp_subtype_t type,
+				    const union sctp_subtype type,
 				    void *arg,
 				    sctp_cmd_seq_t *commands)
 {
@@ -1681,7 +1681,7 @@ sctp_disposition_t sctp_sf_do_5_2_1_siminit(struct net *net,
 sctp_disposition_t sctp_sf_do_5_2_2_dupinit(struct net *net,
 					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
-					const sctp_subtype_t type,
+					const union sctp_subtype type,
 					void *arg,
 					sctp_cmd_seq_t *commands)
 {
@@ -1704,7 +1704,7 @@ sctp_disposition_t sctp_sf_do_5_2_2_dupinit(struct net *net,
 sctp_disposition_t sctp_sf_do_5_2_3_initack(struct net *net,
 					    const struct sctp_endpoint *ep,
 					    const struct sctp_association *asoc,
-					    const sctp_subtype_t type,
+					    const union sctp_subtype type,
 					    void *arg, sctp_cmd_seq_t *commands)
 {
 	/* Per the above section, we'll discard the chunk if we have an
@@ -2027,7 +2027,7 @@ nomem:
 sctp_disposition_t sctp_sf_do_5_2_4_dupcook(struct net *net,
 					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
-					const sctp_subtype_t type,
+					const union sctp_subtype type,
 					void *arg,
 					sctp_cmd_seq_t *commands)
 {
@@ -2146,7 +2146,7 @@ sctp_disposition_t sctp_sf_shutdown_pending_abort(
 	struct net *net,
 	const struct sctp_endpoint *ep,
 	const struct sctp_association *asoc,
-	const sctp_subtype_t type,
+	const union sctp_subtype type,
 	void *arg,
 	sctp_cmd_seq_t *commands)
 {
@@ -2188,7 +2188,7 @@ sctp_disposition_t sctp_sf_shutdown_pending_abort(
 sctp_disposition_t sctp_sf_shutdown_sent_abort(struct net *net,
 					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
-					const sctp_subtype_t type,
+					const union sctp_subtype type,
 					void *arg,
 					sctp_cmd_seq_t *commands)
 {
@@ -2239,7 +2239,7 @@ sctp_disposition_t sctp_sf_shutdown_ack_sent_abort(
 	struct net *net,
 	const struct sctp_endpoint *ep,
 	const struct sctp_association *asoc,
-	const sctp_subtype_t type,
+	const union sctp_subtype type,
 	void *arg,
 	sctp_cmd_seq_t *commands)
 {
@@ -2266,7 +2266,7 @@ sctp_disposition_t sctp_sf_shutdown_ack_sent_abort(
 sctp_disposition_t sctp_sf_cookie_echoed_err(struct net *net,
 					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
-					const sctp_subtype_t type,
+					const union sctp_subtype type,
 					void *arg,
 					sctp_cmd_seq_t *commands)
 {
@@ -2330,7 +2330,7 @@ sctp_disposition_t sctp_sf_cookie_echoed_err(struct net *net,
 static sctp_disposition_t sctp_sf_do_5_2_6_stale(struct net *net,
 						 const struct sctp_endpoint *ep,
 						 const struct sctp_association *asoc,
-						 const sctp_subtype_t type,
+						 const union sctp_subtype type,
 						 void *arg,
 						 sctp_cmd_seq_t *commands)
 {
@@ -2452,7 +2452,7 @@ nomem:
 sctp_disposition_t sctp_sf_do_9_1_abort(struct net *net,
 					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
-					const sctp_subtype_t type,
+					const union sctp_subtype type,
 					void *arg,
 					sctp_cmd_seq_t *commands)
 {
@@ -2489,7 +2489,7 @@ sctp_disposition_t sctp_sf_do_9_1_abort(struct net *net,
 static sctp_disposition_t __sctp_sf_do_9_1_abort(struct net *net,
 					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
-					const sctp_subtype_t type,
+					const union sctp_subtype type,
 					void *arg,
 					sctp_cmd_seq_t *commands)
 {
@@ -2527,7 +2527,7 @@ static sctp_disposition_t __sctp_sf_do_9_1_abort(struct net *net,
 sctp_disposition_t sctp_sf_cookie_wait_abort(struct net *net,
 				     const struct sctp_endpoint *ep,
 				     const struct sctp_association *asoc,
-				     const sctp_subtype_t type,
+				     const union sctp_subtype type,
 				     void *arg,
 				     sctp_cmd_seq_t *commands)
 {
@@ -2566,7 +2566,7 @@ sctp_disposition_t sctp_sf_cookie_wait_abort(struct net *net,
 sctp_disposition_t sctp_sf_cookie_wait_icmp_abort(struct net *net,
 					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
-					const sctp_subtype_t type,
+					const union sctp_subtype type,
 					void *arg,
 					sctp_cmd_seq_t *commands)
 {
@@ -2581,7 +2581,7 @@ sctp_disposition_t sctp_sf_cookie_wait_icmp_abort(struct net *net,
 sctp_disposition_t sctp_sf_cookie_echoed_abort(struct net *net,
 					       const struct sctp_endpoint *ep,
 					       const struct sctp_association *asoc,
-					       const sctp_subtype_t type,
+					       const union sctp_subtype type,
 					       void *arg,
 					       sctp_cmd_seq_t *commands)
 {
@@ -2653,7 +2653,7 @@ static sctp_disposition_t sctp_stop_t1_and_abort(struct net *net,
 sctp_disposition_t sctp_sf_do_9_2_shutdown(struct net *net,
 					   const struct sctp_endpoint *ep,
 					   const struct sctp_association *asoc,
-					   const sctp_subtype_t type,
+					   const union sctp_subtype type,
 					   void *arg,
 					   sctp_cmd_seq_t *commands)
 {
@@ -2742,7 +2742,7 @@ out:
 sctp_disposition_t sctp_sf_do_9_2_shut_ctsn(struct net *net,
 					   const struct sctp_endpoint *ep,
 					   const struct sctp_association *asoc,
-					   const sctp_subtype_t type,
+					   const union sctp_subtype type,
 					   void *arg,
 					   sctp_cmd_seq_t *commands)
 {
@@ -2795,7 +2795,7 @@ sctp_disposition_t sctp_sf_do_9_2_shut_ctsn(struct net *net,
 sctp_disposition_t sctp_sf_do_9_2_reshutack(struct net *net,
 				    const struct sctp_endpoint *ep,
 				    const struct sctp_association *asoc,
-				    const sctp_subtype_t type,
+				    const union sctp_subtype type,
 				    void *arg,
 				    sctp_cmd_seq_t *commands)
 {
@@ -2859,7 +2859,7 @@ nomem:
 sctp_disposition_t sctp_sf_do_ecn_cwr(struct net *net,
 				      const struct sctp_endpoint *ep,
 				      const struct sctp_association *asoc,
-				      const sctp_subtype_t type,
+				      const union sctp_subtype type,
 				      void *arg,
 				      sctp_cmd_seq_t *commands)
 {
@@ -2915,7 +2915,7 @@ sctp_disposition_t sctp_sf_do_ecn_cwr(struct net *net,
 sctp_disposition_t sctp_sf_do_ecne(struct net *net,
 				   const struct sctp_endpoint *ep,
 				   const struct sctp_association *asoc,
-				   const sctp_subtype_t type,
+				   const union sctp_subtype type,
 				   void *arg,
 				   sctp_cmd_seq_t *commands)
 {
@@ -2972,7 +2972,7 @@ sctp_disposition_t sctp_sf_do_ecne(struct net *net,
 sctp_disposition_t sctp_sf_eat_data_6_2(struct net *net,
 					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
-					const sctp_subtype_t type,
+					const union sctp_subtype type,
 					void *arg,
 					sctp_cmd_seq_t *commands)
 {
@@ -3092,7 +3092,7 @@ discard_noforce:
 sctp_disposition_t sctp_sf_eat_data_fast_4_4(struct net *net,
 				     const struct sctp_endpoint *ep,
 				     const struct sctp_association *asoc,
-				     const sctp_subtype_t type,
+				     const union sctp_subtype type,
 				     void *arg,
 				     sctp_cmd_seq_t *commands)
 {
@@ -3183,7 +3183,7 @@ sctp_disposition_t sctp_sf_eat_data_fast_4_4(struct net *net,
 sctp_disposition_t sctp_sf_eat_sack_6_2(struct net *net,
 					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
-					const sctp_subtype_t type,
+					const union sctp_subtype type,
 					void *arg,
 					sctp_cmd_seq_t *commands)
 {
@@ -3257,7 +3257,7 @@ sctp_disposition_t sctp_sf_eat_sack_6_2(struct net *net,
 static sctp_disposition_t sctp_sf_tabort_8_4_8(struct net *net,
 					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
-					const sctp_subtype_t type,
+					const union sctp_subtype type,
 					void *arg,
 					sctp_cmd_seq_t *commands)
 {
@@ -3307,7 +3307,7 @@ static sctp_disposition_t sctp_sf_tabort_8_4_8(struct net *net,
 sctp_disposition_t sctp_sf_operr_notify(struct net *net,
 					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
-					const sctp_subtype_t type,
+					const union sctp_subtype type,
 					void *arg,
 					sctp_cmd_seq_t *commands)
 {
@@ -3345,7 +3345,7 @@ sctp_disposition_t sctp_sf_operr_notify(struct net *net,
 sctp_disposition_t sctp_sf_do_9_2_final(struct net *net,
 					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
-					const sctp_subtype_t type,
+					const union sctp_subtype type,
 					void *arg,
 					sctp_cmd_seq_t *commands)
 {
@@ -3428,7 +3428,7 @@ nomem:
 sctp_disposition_t sctp_sf_ootb(struct net *net,
 				const struct sctp_endpoint *ep,
 				const struct sctp_association *asoc,
-				const sctp_subtype_t type,
+				const union sctp_subtype type,
 				void *arg,
 				sctp_cmd_seq_t *commands)
 {
@@ -3521,7 +3521,7 @@ sctp_disposition_t sctp_sf_ootb(struct net *net,
 static sctp_disposition_t sctp_sf_shut_8_4_5(struct net *net,
 					     const struct sctp_endpoint *ep,
 					     const struct sctp_association *asoc,
-					     const sctp_subtype_t type,
+					     const union sctp_subtype type,
 					     void *arg,
 					     sctp_cmd_seq_t *commands)
 {
@@ -3583,7 +3583,7 @@ static sctp_disposition_t sctp_sf_shut_8_4_5(struct net *net,
 sctp_disposition_t sctp_sf_do_8_5_1_E_sa(struct net *net,
 				      const struct sctp_endpoint *ep,
 				      const struct sctp_association *asoc,
-				      const sctp_subtype_t type,
+				      const union sctp_subtype type,
 				      void *arg,
 				      sctp_cmd_seq_t *commands)
 {
@@ -3608,7 +3608,7 @@ sctp_disposition_t sctp_sf_do_8_5_1_E_sa(struct net *net,
 sctp_disposition_t sctp_sf_do_asconf(struct net *net,
 				     const struct sctp_endpoint *ep,
 				     const struct sctp_association *asoc,
-				     const sctp_subtype_t type, void *arg,
+				     const union sctp_subtype type, void *arg,
 				     sctp_cmd_seq_t *commands)
 {
 	struct sctp_chunk	*chunk = arg;
@@ -3725,7 +3725,8 @@ sctp_disposition_t sctp_sf_do_asconf(struct net *net,
 sctp_disposition_t sctp_sf_do_asconf_ack(struct net *net,
 					 const struct sctp_endpoint *ep,
 					 const struct sctp_association *asoc,
-					 const sctp_subtype_t type, void *arg,
+					 const union sctp_subtype type,
+					 void *arg,
 					 sctp_cmd_seq_t *commands)
 {
 	struct sctp_chunk	*asconf_ack = arg;
@@ -3843,7 +3844,7 @@ sctp_disposition_t sctp_sf_do_asconf_ack(struct net *net,
 sctp_disposition_t sctp_sf_do_reconf(struct net *net,
 				     const struct sctp_endpoint *ep,
 				     const struct sctp_association *asoc,
-				     const sctp_subtype_t type, void *arg,
+				     const union sctp_subtype type, void *arg,
 				     sctp_cmd_seq_t *commands)
 {
 	struct sctp_paramhdr *err_param = NULL;
@@ -3919,7 +3920,7 @@ sctp_disposition_t sctp_sf_do_reconf(struct net *net,
 sctp_disposition_t sctp_sf_eat_fwd_tsn(struct net *net,
 				       const struct sctp_endpoint *ep,
 				       const struct sctp_association *asoc,
-				       const sctp_subtype_t type,
+				       const union sctp_subtype type,
 				       void *arg,
 				       sctp_cmd_seq_t *commands)
 {
@@ -3990,7 +3991,7 @@ sctp_disposition_t sctp_sf_eat_fwd_tsn_fast(
 	struct net *net,
 	const struct sctp_endpoint *ep,
 	const struct sctp_association *asoc,
-	const sctp_subtype_t type,
+	const union sctp_subtype type,
 	void *arg,
 	sctp_cmd_seq_t *commands)
 {
@@ -4082,7 +4083,7 @@ static enum sctp_ierror sctp_sf_authenticate(
 				struct net *net,
 				const struct sctp_endpoint *ep,
 				const struct sctp_association *asoc,
-				const sctp_subtype_t type,
+				const union sctp_subtype type,
 				struct sctp_chunk *chunk)
 {
 	struct sctp_authhdr *auth_hdr;
@@ -4157,7 +4158,7 @@ nomem:
 sctp_disposition_t sctp_sf_eat_auth(struct net *net,
 				    const struct sctp_endpoint *ep,
 				    const struct sctp_association *asoc,
-				    const sctp_subtype_t type,
+				    const union sctp_subtype type,
 				    void *arg,
 				    sctp_cmd_seq_t *commands)
 {
@@ -4254,7 +4255,7 @@ sctp_disposition_t sctp_sf_eat_auth(struct net *net,
 sctp_disposition_t sctp_sf_unk_chunk(struct net *net,
 				     const struct sctp_endpoint *ep,
 				     const struct sctp_association *asoc,
-				     const sctp_subtype_t type,
+				     const union sctp_subtype type,
 				     void *arg,
 				     sctp_cmd_seq_t *commands)
 {
@@ -4334,7 +4335,7 @@ sctp_disposition_t sctp_sf_unk_chunk(struct net *net,
 sctp_disposition_t sctp_sf_discard_chunk(struct net *net,
 					 const struct sctp_endpoint *ep,
 					 const struct sctp_association *asoc,
-					 const sctp_subtype_t type,
+					 const union sctp_subtype type,
 					 void *arg,
 					 sctp_cmd_seq_t *commands)
 {
@@ -4374,7 +4375,7 @@ sctp_disposition_t sctp_sf_discard_chunk(struct net *net,
 sctp_disposition_t sctp_sf_pdiscard(struct net *net,
 				    const struct sctp_endpoint *ep,
 				    const struct sctp_association *asoc,
-				    const sctp_subtype_t type,
+				    const union sctp_subtype type,
 				    void *arg,
 				    sctp_cmd_seq_t *commands)
 {
@@ -4402,7 +4403,7 @@ sctp_disposition_t sctp_sf_pdiscard(struct net *net,
 sctp_disposition_t sctp_sf_violation(struct net *net,
 				     const struct sctp_endpoint *ep,
 				     const struct sctp_association *asoc,
-				     const sctp_subtype_t type,
+				     const union sctp_subtype type,
 				     void *arg,
 				     sctp_cmd_seq_t *commands)
 {
@@ -4540,7 +4541,7 @@ static sctp_disposition_t sctp_sf_violation_chunklen(
 				     struct net *net,
 				     const struct sctp_endpoint *ep,
 				     const struct sctp_association *asoc,
-				     const sctp_subtype_t type,
+				     const union sctp_subtype type,
 				     void *arg,
 				     sctp_cmd_seq_t *commands)
 {
@@ -4560,7 +4561,7 @@ static sctp_disposition_t sctp_sf_violation_paramlen(
 				     struct net *net,
 				     const struct sctp_endpoint *ep,
 				     const struct sctp_association *asoc,
-				     const sctp_subtype_t type,
+				     const union sctp_subtype type,
 				     void *arg, void *ext,
 				     sctp_cmd_seq_t *commands)
 {
@@ -4603,7 +4604,7 @@ static sctp_disposition_t sctp_sf_violation_ctsn(
 				     struct net *net,
 				     const struct sctp_endpoint *ep,
 				     const struct sctp_association *asoc,
-				     const sctp_subtype_t type,
+				     const union sctp_subtype type,
 				     void *arg,
 				     sctp_cmd_seq_t *commands)
 {
@@ -4623,7 +4624,7 @@ static sctp_disposition_t sctp_sf_violation_chunk(
 				     struct net *net,
 				     const struct sctp_endpoint *ep,
 				     const struct sctp_association *asoc,
-				     const sctp_subtype_t type,
+				     const union sctp_subtype type,
 				     void *arg,
 				     sctp_cmd_seq_t *commands)
 {
@@ -4698,7 +4699,7 @@ static sctp_disposition_t sctp_sf_violation_chunk(
 sctp_disposition_t sctp_sf_do_prm_asoc(struct net *net,
 				       const struct sctp_endpoint *ep,
 				       const struct sctp_association *asoc,
-				       const sctp_subtype_t type,
+				       const union sctp_subtype type,
 				       void *arg,
 				       sctp_cmd_seq_t *commands)
 {
@@ -4810,7 +4811,7 @@ nomem:
 sctp_disposition_t sctp_sf_do_prm_send(struct net *net,
 				       const struct sctp_endpoint *ep,
 				       const struct sctp_association *asoc,
-				       const sctp_subtype_t type,
+				       const union sctp_subtype type,
 				       void *arg,
 				       sctp_cmd_seq_t *commands)
 {
@@ -4850,7 +4851,7 @@ sctp_disposition_t sctp_sf_do_9_2_prm_shutdown(
 	struct net *net,
 	const struct sctp_endpoint *ep,
 	const struct sctp_association *asoc,
-	const sctp_subtype_t type,
+	const union sctp_subtype type,
 	void *arg,
 	sctp_cmd_seq_t *commands)
 {
@@ -4906,7 +4907,7 @@ sctp_disposition_t sctp_sf_do_9_1_prm_abort(
 	struct net *net,
 	const struct sctp_endpoint *ep,
 	const struct sctp_association *asoc,
-	const sctp_subtype_t type,
+	const union sctp_subtype type,
 	void *arg,
 	sctp_cmd_seq_t *commands)
 {
@@ -4943,7 +4944,7 @@ sctp_disposition_t sctp_sf_do_9_1_prm_abort(
 sctp_disposition_t sctp_sf_error_closed(struct net *net,
 					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
-					const sctp_subtype_t type,
+					const union sctp_subtype type,
 					void *arg,
 					sctp_cmd_seq_t *commands)
 {
@@ -4957,7 +4958,7 @@ sctp_disposition_t sctp_sf_error_closed(struct net *net,
 sctp_disposition_t sctp_sf_error_shutdown(struct net *net,
 					  const struct sctp_endpoint *ep,
 					  const struct sctp_association *asoc,
-					  const sctp_subtype_t type,
+					  const union sctp_subtype type,
 					  void *arg,
 					  sctp_cmd_seq_t *commands)
 {
@@ -4984,7 +4985,7 @@ sctp_disposition_t sctp_sf_cookie_wait_prm_shutdown(
 	struct net *net,
 	const struct sctp_endpoint *ep,
 	const struct sctp_association *asoc,
-	const sctp_subtype_t type,
+	const union sctp_subtype type,
 	void *arg,
 	sctp_cmd_seq_t *commands)
 {
@@ -5019,7 +5020,7 @@ sctp_disposition_t sctp_sf_cookie_echoed_prm_shutdown(
 	struct net *net,
 	const struct sctp_endpoint *ep,
 	const struct sctp_association *asoc,
-	const sctp_subtype_t type,
+	const union sctp_subtype type,
 	void *arg, sctp_cmd_seq_t *commands)
 {
 	/* There is a single T1 timer, so we should be able to use
@@ -5046,7 +5047,7 @@ sctp_disposition_t sctp_sf_cookie_wait_prm_abort(
 	struct net *net,
 	const struct sctp_endpoint *ep,
 	const struct sctp_association *asoc,
-	const sctp_subtype_t type,
+	const union sctp_subtype type,
 	void *arg,
 	sctp_cmd_seq_t *commands)
 {
@@ -5095,7 +5096,7 @@ sctp_disposition_t sctp_sf_cookie_echoed_prm_abort(
 	struct net *net,
 	const struct sctp_endpoint *ep,
 	const struct sctp_association *asoc,
-	const sctp_subtype_t type,
+	const union sctp_subtype type,
 	void *arg,
 	sctp_cmd_seq_t *commands)
 {
@@ -5121,7 +5122,7 @@ sctp_disposition_t sctp_sf_shutdown_pending_prm_abort(
 	struct net *net,
 	const struct sctp_endpoint *ep,
 	const struct sctp_association *asoc,
-	const sctp_subtype_t type,
+	const union sctp_subtype type,
 	void *arg,
 	sctp_cmd_seq_t *commands)
 {
@@ -5148,7 +5149,7 @@ sctp_disposition_t sctp_sf_shutdown_sent_prm_abort(
 	struct net *net,
 	const struct sctp_endpoint *ep,
 	const struct sctp_association *asoc,
-	const sctp_subtype_t type,
+	const union sctp_subtype type,
 	void *arg,
 	sctp_cmd_seq_t *commands)
 {
@@ -5179,7 +5180,7 @@ sctp_disposition_t sctp_sf_shutdown_ack_sent_prm_abort(
 	struct net *net,
 	const struct sctp_endpoint *ep,
 	const struct sctp_association *asoc,
-	const sctp_subtype_t type,
+	const union sctp_subtype type,
 	void *arg,
 	sctp_cmd_seq_t *commands)
 {
@@ -5215,7 +5216,7 @@ sctp_disposition_t sctp_sf_do_prm_requestheartbeat(
 					struct net *net,
 					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
-					const sctp_subtype_t type,
+					const union sctp_subtype type,
 					void *arg,
 					sctp_cmd_seq_t *commands)
 {
@@ -5247,7 +5248,7 @@ sctp_disposition_t sctp_sf_do_prm_requestheartbeat(
 sctp_disposition_t sctp_sf_do_prm_asconf(struct net *net,
 					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
-					const sctp_subtype_t type,
+					const union sctp_subtype type,
 					void *arg,
 					sctp_cmd_seq_t *commands)
 {
@@ -5264,7 +5265,7 @@ sctp_disposition_t sctp_sf_do_prm_asconf(struct net *net,
 sctp_disposition_t sctp_sf_do_prm_reconf(struct net *net,
 					 const struct sctp_endpoint *ep,
 					 const struct sctp_association *asoc,
-					 const sctp_subtype_t type,
+					 const union sctp_subtype type,
 					 void *arg, sctp_cmd_seq_t *commands)
 {
 	struct sctp_chunk *chunk = arg;
@@ -5282,7 +5283,7 @@ sctp_disposition_t sctp_sf_ignore_primitive(
 	struct net *net,
 	const struct sctp_endpoint *ep,
 	const struct sctp_association *asoc,
-	const sctp_subtype_t type,
+	const union sctp_subtype type,
 	void *arg,
 	sctp_cmd_seq_t *commands)
 {
@@ -5306,7 +5307,7 @@ sctp_disposition_t sctp_sf_do_no_pending_tsn(
 	struct net *net,
 	const struct sctp_endpoint *ep,
 	const struct sctp_association *asoc,
-	const sctp_subtype_t type,
+	const union sctp_subtype type,
 	void *arg,
 	sctp_cmd_seq_t *commands)
 {
@@ -5338,7 +5339,7 @@ sctp_disposition_t sctp_sf_do_9_2_start_shutdown(
 	struct net *net,
 	const struct sctp_endpoint *ep,
 	const struct sctp_association *asoc,
-	const sctp_subtype_t type,
+	const union sctp_subtype type,
 	void *arg,
 	sctp_cmd_seq_t *commands)
 {
@@ -5408,7 +5409,7 @@ sctp_disposition_t sctp_sf_do_9_2_shutdown_ack(
 	struct net *net,
 	const struct sctp_endpoint *ep,
 	const struct sctp_association *asoc,
-	const sctp_subtype_t type,
+	const union sctp_subtype type,
 	void *arg,
 	sctp_cmd_seq_t *commands)
 {
@@ -5481,7 +5482,7 @@ nomem:
 sctp_disposition_t sctp_sf_ignore_other(struct net *net,
 					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
-					const sctp_subtype_t type,
+					const union sctp_subtype type,
 					void *arg,
 					sctp_cmd_seq_t *commands)
 {
@@ -5509,7 +5510,7 @@ sctp_disposition_t sctp_sf_ignore_other(struct net *net,
 sctp_disposition_t sctp_sf_do_6_3_3_rtx(struct net *net,
 					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
-					const sctp_subtype_t type,
+					const union sctp_subtype type,
 					void *arg,
 					sctp_cmd_seq_t *commands)
 {
@@ -5597,7 +5598,7 @@ sctp_disposition_t sctp_sf_do_6_3_3_rtx(struct net *net,
 sctp_disposition_t sctp_sf_do_6_2_sack(struct net *net,
 				       const struct sctp_endpoint *ep,
 				       const struct sctp_association *asoc,
-				       const sctp_subtype_t type,
+				       const union sctp_subtype type,
 				       void *arg,
 				       sctp_cmd_seq_t *commands)
 {
@@ -5628,7 +5629,7 @@ sctp_disposition_t sctp_sf_do_6_2_sack(struct net *net,
 sctp_disposition_t sctp_sf_t1_init_timer_expire(struct net *net,
 					   const struct sctp_endpoint *ep,
 					   const struct sctp_association *asoc,
-					   const sctp_subtype_t type,
+					   const union sctp_subtype type,
 					   void *arg,
 					   sctp_cmd_seq_t *commands)
 {
@@ -5692,7 +5693,7 @@ sctp_disposition_t sctp_sf_t1_init_timer_expire(struct net *net,
 sctp_disposition_t sctp_sf_t1_cookie_timer_expire(struct net *net,
 					   const struct sctp_endpoint *ep,
 					   const struct sctp_association *asoc,
-					   const sctp_subtype_t type,
+					   const union sctp_subtype type,
 					   void *arg,
 					   sctp_cmd_seq_t *commands)
 {
@@ -5742,7 +5743,7 @@ sctp_disposition_t sctp_sf_t1_cookie_timer_expire(struct net *net,
 sctp_disposition_t sctp_sf_t2_timer_expire(struct net *net,
 					   const struct sctp_endpoint *ep,
 					   const struct sctp_association *asoc,
-					   const sctp_subtype_t type,
+					   const union sctp_subtype type,
 					   void *arg,
 					   sctp_cmd_seq_t *commands)
 {
@@ -5813,7 +5814,7 @@ sctp_disposition_t sctp_sf_t4_timer_expire(
 	struct net *net,
 	const struct sctp_endpoint *ep,
 	const struct sctp_association *asoc,
-	const sctp_subtype_t type,
+	const union sctp_subtype type,
 	void *arg,
 	sctp_cmd_seq_t *commands)
 {
@@ -5884,7 +5885,7 @@ sctp_disposition_t sctp_sf_t4_timer_expire(
 sctp_disposition_t sctp_sf_t5_timer_expire(struct net *net,
 					   const struct sctp_endpoint *ep,
 					   const struct sctp_association *asoc,
-					   const sctp_subtype_t type,
+					   const union sctp_subtype type,
 					   void *arg,
 					   sctp_cmd_seq_t *commands)
 {
@@ -5921,7 +5922,7 @@ sctp_disposition_t sctp_sf_autoclose_timer_expire(
 	struct net *net,
 	const struct sctp_endpoint *ep,
 	const struct sctp_association *asoc,
-	const sctp_subtype_t type,
+	const union sctp_subtype type,
 	void *arg,
 	sctp_cmd_seq_t *commands)
 {
@@ -5963,7 +5964,7 @@ sctp_disposition_t sctp_sf_autoclose_timer_expire(
 sctp_disposition_t sctp_sf_not_impl(struct net *net,
 				    const struct sctp_endpoint *ep,
 				    const struct sctp_association *asoc,
-				    const sctp_subtype_t type,
+				    const union sctp_subtype type,
 				    void *arg,
 				    sctp_cmd_seq_t *commands)
 {
@@ -5981,7 +5982,7 @@ sctp_disposition_t sctp_sf_not_impl(struct net *net,
 sctp_disposition_t sctp_sf_bug(struct net *net,
 			       const struct sctp_endpoint *ep,
 			       const struct sctp_association *asoc,
-			       const sctp_subtype_t type,
+			       const union sctp_subtype type,
 			       void *arg,
 			       sctp_cmd_seq_t *commands)
 {
@@ -6002,7 +6003,7 @@ sctp_disposition_t sctp_sf_bug(struct net *net,
 sctp_disposition_t sctp_sf_timer_ignore(struct net *net,
 					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
-					const sctp_subtype_t type,
+					const union sctp_subtype type,
 					void *arg,
 					sctp_cmd_seq_t *commands)
 {
diff --git a/net/sctp/sm_statetable.c b/net/sctp/sm_statetable.c
index f7cdb7014244..d437f3801399 100644
--- a/net/sctp/sm_statetable.c
+++ b/net/sctp/sm_statetable.c
@@ -77,10 +77,11 @@ static const sctp_sm_table_entry_t bug = {
 	rtn;								\
 })
 
-const sctp_sm_table_entry_t *sctp_sm_lookup_event(struct net *net,
-						  enum sctp_event event_type,
-						  enum sctp_state state,
-						  sctp_subtype_t event_subtype)
+const sctp_sm_table_entry_t *sctp_sm_lookup_event(
+					struct net *net,
+					enum sctp_event event_type,
+					enum sctp_state state,
+					union sctp_subtype event_subtype)
 {
 	switch (event_type) {
 	case SCTP_EVENT_T_CHUNK:
-- 
cgit v1.2.3


From 3e0e82664322290a59189f7c2bcb39b0de932505 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Mon, 7 Aug 2017 10:15:19 +0200
Subject: net: sched: make egress_dev flag part of flower offload struct

Since this is specific to flower now, make it part of the flower offload
struct.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 2 +-
 include/linux/netdevice.h                        | 1 -
 include/net/pkt_cls.h                            | 1 +
 net/sched/cls_flower.c                           | 2 +-
 4 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/net')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index d44049ed5371..0e6bab182071 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -661,7 +661,7 @@ static int mlx5e_rep_ndo_setup_tc(struct net_device *dev,
 	if (TC_H_MAJ(handle) != TC_H_MAJ(TC_H_INGRESS))
 		return -EOPNOTSUPP;
 
-	if (tc->egress_dev) {
+	if (type == TC_SETUP_CLSFLOWER && tc->cls_flower->egress_dev) {
 		struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
 		struct net_device *uplink_dev = mlx5_eswitch_get_uplink_netdev(esw);
 
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index f8051a36f900..bd49dbaee84e 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -792,7 +792,6 @@ struct tc_to_netdev {
 		struct tc_cls_bpf_offload *cls_bpf;
 		struct tc_mqprio_qopt *mqprio;
 	};
-	bool egress_dev;
 };
 
 /* These structures hold the attributes of xdp state that are being passed
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index e0c54f111467..8213acdfdf5a 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -504,6 +504,7 @@ struct tc_cls_flower_offload {
 	struct fl_flow_key *mask;
 	struct fl_flow_key *key;
 	struct tcf_exts *exts;
+	bool egress_dev;
 };
 
 enum tc_matchall_command {
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index ddeed17d2024..52deeed2b7f5 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -259,7 +259,7 @@ static int fl_hw_replace_filter(struct tcf_proto *tp,
 			return tc_skip_sw(f->flags) ? -EINVAL : 0;
 		}
 		dev = f->hw_dev;
-		tc->egress_dev = true;
+		offload.egress_dev = true;
 	} else {
 		f->hw_dev = dev;
 	}
-- 
cgit v1.2.3


From 5fd9fc4e207dba0c05cafe78417952b4c4ca02dc Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Mon, 7 Aug 2017 10:15:29 +0200
Subject: net: sched: push cls related args into cls_common structure

As ndo_setup_tc is generic offload op for whole tc subsystem, does not
really make sense to have cls-specific args. So move them under
cls_common structurure which is embedded in all cls structs.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/amd/xgbe/xgbe-drv.c           |  1 -
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c    |  1 -
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h    |  1 -
 drivers/net/ethernet/broadcom/bnxt/bnxt.c          |  1 -
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c    | 14 +++++-------
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c  |  7 +++---
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.h  |  6 ++----
 drivers/net/ethernet/freescale/dpaa/dpaa_eth.c     |  1 -
 .../net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c |  1 -
 drivers/net/ethernet/intel/fm10k/fm10k_netdev.c    |  1 -
 drivers/net/ethernet/intel/i40e/i40e_main.c        |  1 -
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c      | 17 ++++++---------
 drivers/net/ethernet/mellanox/mlx4/en_netdev.c     |  1 -
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c  | 11 ++++------
 drivers/net/ethernet/mellanox/mlx5/core/en_rep.c   | 15 +++++--------
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c    |  2 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.h    |  2 +-
 drivers/net/ethernet/mellanox/mlxsw/spectrum.c     | 25 ++++++++--------------
 drivers/net/ethernet/mellanox/mlxsw/spectrum.h     |  2 +-
 .../net/ethernet/mellanox/mlxsw/spectrum_flower.c  |  2 +-
 drivers/net/ethernet/netronome/nfp/bpf/main.c      | 10 +++++----
 drivers/net/ethernet/netronome/nfp/flower/main.h   |  3 +--
 .../net/ethernet/netronome/nfp/flower/offload.c    | 12 ++++++-----
 drivers/net/ethernet/netronome/nfp/nfp_app.h       |  6 ++----
 drivers/net/ethernet/netronome/nfp/nfp_port.c      |  6 +-----
 drivers/net/ethernet/netronome/nfp/nfp_port.h      |  1 -
 drivers/net/ethernet/sfc/efx.h                     |  1 -
 drivers/net/ethernet/sfc/falcon/efx.h              |  1 -
 drivers/net/ethernet/sfc/falcon/tx.c               |  1 -
 drivers/net/ethernet/sfc/tx.c                      |  1 -
 drivers/net/ethernet/ti/netcp_core.c               |  1 -
 include/linux/netdevice.h                          |  3 ---
 include/net/pkt_cls.h                              | 19 ++++++++++++++++
 net/dsa/slave.c                                    | 14 +++++-------
 net/sched/cls_bpf.c                                |  7 ++----
 net/sched/cls_flower.c                             | 13 ++++++-----
 net/sched/cls_matchall.c                           |  8 +++----
 net/sched/cls_u32.c                                | 20 +++++++----------
 net/sched/sch_mqprio.c                             |  6 ++----
 39 files changed, 101 insertions(+), 144 deletions(-)

(limited to 'include/net')

diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
index 6a6ea3bdd056..bbb7bfe0be7f 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
@@ -1919,7 +1919,6 @@ static void xgbe_poll_controller(struct net_device *netdev)
 #endif /* End CONFIG_NET_POLL_CONTROLLER */
 
 static int xgbe_setup_tc(struct net_device *netdev, enum tc_setup_type type,
-			 u32 handle, u32 chain_index, __be16 proto,
 			 struct tc_to_netdev *tc_to_netdev)
 {
 	struct xgbe_prv_data *pdata = netdev_priv(netdev);
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
index 4395d1cac86f..257cf4be0162 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
@@ -4285,7 +4285,6 @@ int bnx2x_setup_tc(struct net_device *dev, u8 num_tc)
 }
 
 int __bnx2x_setup_tc(struct net_device *dev, enum tc_setup_type type,
-		     u32 handle, u32 chain_index, __be16 proto,
 		     struct tc_to_netdev *tc)
 {
 	if (type != TC_SETUP_MQPRIO)
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
index 1ac4eb0d3413..04eb95043023 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
@@ -487,7 +487,6 @@ netdev_tx_t bnx2x_start_xmit(struct sk_buff *skb, struct net_device *dev);
 /* setup_tc callback */
 int bnx2x_setup_tc(struct net_device *dev, u8 num_tc);
 int __bnx2x_setup_tc(struct net_device *dev, enum tc_setup_type type,
-		     u32 handle, u32 chain_index, __be16 proto,
 		     struct tc_to_netdev *tc);
 
 int bnx2x_get_vf_config(struct net_device *dev, int vf,
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index b98d9f33d9af..1545b88c545d 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -7238,7 +7238,6 @@ int bnxt_setup_mq_tc(struct net_device *dev, u8 tc)
 }
 
 static int bnxt_setup_tc(struct net_device *dev, enum tc_setup_type type,
-			 u32 handle, u32 chain_index, __be16 proto,
 			 struct tc_to_netdev *ntc)
 {
 	if (type != TC_SETUP_MQPRIO)
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 651229070113..13199317c8e0 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -2890,27 +2890,24 @@ static int cxgb_set_tx_maxrate(struct net_device *dev, int index, u32 rate)
 }
 
 static int cxgb_setup_tc_cls_u32(struct net_device *dev,
-				 enum tc_setup_type type,
-				 u32 handle, u32 chain_index, __be16 proto,
 				 struct tc_cls_u32_offload *cls_u32)
 {
-	if (TC_H_MAJ(handle) != TC_H_MAJ(TC_H_INGRESS) ||
-	    chain_index)
+	if (TC_H_MAJ(cls_u32->common.handle) != TC_H_MAJ(TC_H_INGRESS) ||
+	    cls_u32->common.chain_index)
 		return -EOPNOTSUPP;
 
 	switch (cls_u32->command) {
 	case TC_CLSU32_NEW_KNODE:
 	case TC_CLSU32_REPLACE_KNODE:
-		return cxgb4_config_knode(dev, proto, cls_u32);
+		return cxgb4_config_knode(dev, cls_u32);
 	case TC_CLSU32_DELETE_KNODE:
-		return cxgb4_delete_knode(dev, proto, cls_u32);
+		return cxgb4_delete_knode(dev, cls_u32);
 	default:
 		return -EOPNOTSUPP;
 	}
 }
 
 static int cxgb_setup_tc(struct net_device *dev, enum tc_setup_type type,
-			 u32 handle, u32 chain_index, __be16 proto,
 			 struct tc_to_netdev *tc)
 {
 	struct port_info *pi = netdev2pinfo(dev);
@@ -2925,8 +2922,7 @@ static int cxgb_setup_tc(struct net_device *dev, enum tc_setup_type type,
 
 	switch (type) {
 	case TC_SETUP_CLSU32:
-		return cxgb_setup_tc_cls_u32(dev, type, handle, chain_index,
-					     proto, tc->cls_u32);
+		return cxgb_setup_tc_cls_u32(dev, tc->cls_u32);
 	default:
 		return -EOPNOTSUPP;
 	}
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c
index 6f734c52ef25..48970ba08bdc 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c
@@ -146,11 +146,11 @@ static int fill_action_fields(struct adapter *adap,
 	return 0;
 }
 
-int cxgb4_config_knode(struct net_device *dev, __be16 protocol,
-		       struct tc_cls_u32_offload *cls)
+int cxgb4_config_knode(struct net_device *dev, struct tc_cls_u32_offload *cls)
 {
 	const struct cxgb4_match_field *start, *link_start = NULL;
 	struct adapter *adapter = netdev2adap(dev);
+	__be16 protocol = cls->common.protocol;
 	struct ch_filter_specification fs;
 	struct cxgb4_tc_u32_table *t;
 	struct cxgb4_link *link;
@@ -338,8 +338,7 @@ out:
 	return ret;
 }
 
-int cxgb4_delete_knode(struct net_device *dev, __be16 protocol,
-		       struct tc_cls_u32_offload *cls)
+int cxgb4_delete_knode(struct net_device *dev, struct tc_cls_u32_offload *cls)
 {
 	struct adapter *adapter = netdev2adap(dev);
 	unsigned int filter_id, max_tids, i, j;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.h
index 021261a41c13..70a07b7cca56 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.h
@@ -44,10 +44,8 @@ static inline bool can_tc_u32_offload(struct net_device *dev)
 	return (dev->features & NETIF_F_HW_TC) && adap->tc_u32 ? true : false;
 }
 
-int cxgb4_config_knode(struct net_device *dev, __be16 protocol,
-		       struct tc_cls_u32_offload *cls);
-int cxgb4_delete_knode(struct net_device *dev, __be16 protocol,
-		       struct tc_cls_u32_offload *cls);
+int cxgb4_config_knode(struct net_device *dev, struct tc_cls_u32_offload *cls);
+int cxgb4_delete_knode(struct net_device *dev, struct tc_cls_u32_offload *cls);
 
 void cxgb4_cleanup_tc_u32(struct adapter *adapter);
 struct cxgb4_tc_u32_table *cxgb4_init_tc_u32(struct adapter *adap);
diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
index d86d766777c8..3827608cec29 100644
--- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
+++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
@@ -343,7 +343,6 @@ static void dpaa_get_stats64(struct net_device *net_dev,
 }
 
 static int dpaa_setup_tc(struct net_device *net_dev, enum tc_setup_type type,
-			 u32 handle, u32 chain_index, __be16 proto,
 			 struct tc_to_netdev *tc)
 {
 	struct dpaa_priv *priv = netdev_priv(net_dev);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c
index ef5795923b0c..dc64d751db24 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c
@@ -1220,7 +1220,6 @@ static int hns3_setup_tc(struct net_device *netdev, u8 tc)
 }
 
 static int hns3_nic_setup_tc(struct net_device *dev, enum tc_setup_type type,
-			     u32 handle, u32 chain_index, __be16 protocol,
 			     struct tc_to_netdev *tc)
 {
 	if (type != TC_SETUP_MQPRIO)
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
index b30190639e78..71004b8eff95 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
@@ -1266,7 +1266,6 @@ err_queueing_scheme:
 }
 
 static int __fm10k_setup_tc(struct net_device *dev, enum tc_setup_type type,
-			    u32 handle, u32 chain_index, __be16 proto,
 			    struct tc_to_netdev *tc)
 {
 	if (type != TC_SETUP_MQPRIO)
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 7d47a718f922..97d8bb2e8320 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -5657,7 +5657,6 @@ exit:
 }
 
 static int __i40e_setup_tc(struct net_device *netdev, enum tc_setup_type type,
-			   u32 handle, u32 chain_index, __be16 proto,
 			   struct tc_to_netdev *tc)
 {
 	if (type != TC_SETUP_MQPRIO)
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 35db198199b0..0a350314d76b 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -8851,7 +8851,6 @@ static int ixgbe_delete_clsu32(struct ixgbe_adapter *adapter,
 }
 
 static int ixgbe_configure_clsu32_add_hnode(struct ixgbe_adapter *adapter,
-					    __be16 protocol,
 					    struct tc_cls_u32_offload *cls)
 {
 	u32 uhtid = TC_U32_USERHTID(cls->hnode.handle);
@@ -9037,9 +9036,9 @@ static int ixgbe_clsu32_build_input(struct ixgbe_fdir_filter *input,
 }
 
 static int ixgbe_configure_clsu32(struct ixgbe_adapter *adapter,
-				  __be16 protocol,
 				  struct tc_cls_u32_offload *cls)
 {
+	__be16 protocol = cls->common.protocol;
 	u32 loc = cls->knode.handle & 0xfffff;
 	struct ixgbe_hw *hw = &adapter->hw;
 	struct ixgbe_mat_field *field_ptr;
@@ -9227,25 +9226,23 @@ free_jump:
 }
 
 static int ixgbe_setup_tc_cls_u32(struct net_device *dev,
-				  u32 handle, u32 chain_index, __be16 proto,
 				  struct tc_cls_u32_offload *cls_u32)
 {
 	struct ixgbe_adapter *adapter = netdev_priv(dev);
 
-	if (TC_H_MAJ(handle) != TC_H_MAJ(TC_H_INGRESS) ||
-	    chain_index)
+	if (TC_H_MAJ(cls_u32->common.handle) != TC_H_MAJ(TC_H_INGRESS) ||
+	    cls_u32->common.chain_index)
 		return -EOPNOTSUPP;
 
 	switch (cls_u32->command) {
 	case TC_CLSU32_NEW_KNODE:
 	case TC_CLSU32_REPLACE_KNODE:
-		return ixgbe_configure_clsu32(adapter, proto, cls_u32);
+		return ixgbe_configure_clsu32(adapter, cls_u32);
 	case TC_CLSU32_DELETE_KNODE:
 		return ixgbe_delete_clsu32(adapter, cls_u32);
 	case TC_CLSU32_NEW_HNODE:
 	case TC_CLSU32_REPLACE_HNODE:
-		return ixgbe_configure_clsu32_add_hnode(adapter, proto,
-							cls_u32);
+		return ixgbe_configure_clsu32_add_hnode(adapter, cls_u32);
 	case TC_CLSU32_DELETE_HNODE:
 		return ixgbe_configure_clsu32_del_hnode(adapter, cls_u32);
 	default:
@@ -9261,13 +9258,11 @@ static int ixgbe_setup_tc_mqprio(struct net_device *dev,
 }
 
 static int __ixgbe_setup_tc(struct net_device *dev, enum tc_setup_type type,
-			    u32 handle, u32 chain_index, __be16 proto,
 			    struct tc_to_netdev *tc)
 {
 	switch (type) {
 	case TC_SETUP_CLSU32:
-		return ixgbe_setup_tc_cls_u32(dev, handle, chain_index, proto,
-					      tc->cls_u32);
+		return ixgbe_setup_tc_cls_u32(dev, tc->cls_u32);
 	case TC_SETUP_MQPRIO:
 		return ixgbe_setup_tc_mqprio(dev, tc->mqprio);
 	default:
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 5c33550765ed..e81083e25ba6 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -131,7 +131,6 @@ out:
 }
 
 static int __mlx4_en_setup_tc(struct net_device *dev, enum tc_setup_type type,
-			      u32 handle, u32 chain_index, __be16 proto,
 			      struct tc_to_netdev *tc)
 {
 	if (type != TC_SETUP_MQPRIO)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index adf35da74a85..15f2a942962a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -3032,18 +3032,17 @@ out:
 }
 
 static int mlx5e_setup_tc_cls_flower(struct net_device *dev,
-				     u32 handle, u32 chain_index, __be16 proto,
 				     struct tc_cls_flower_offload *cls_flower)
 {
 	struct mlx5e_priv *priv = netdev_priv(dev);
 
-	if (TC_H_MAJ(handle) != TC_H_MAJ(TC_H_INGRESS) ||
-	    chain_index)
+	if (TC_H_MAJ(cls_flower->common.handle) != TC_H_MAJ(TC_H_INGRESS) ||
+	    cls_flower->common.chain_index)
 		return -EOPNOTSUPP;
 
 	switch (cls_flower->command) {
 	case TC_CLSFLOWER_REPLACE:
-		return mlx5e_configure_flower(priv, proto, cls_flower);
+		return mlx5e_configure_flower(priv, cls_flower);
 	case TC_CLSFLOWER_DESTROY:
 		return mlx5e_delete_flower(priv, cls_flower);
 	case TC_CLSFLOWER_STATS:
@@ -3054,13 +3053,11 @@ static int mlx5e_setup_tc_cls_flower(struct net_device *dev,
 }
 
 static int mlx5e_setup_tc(struct net_device *dev, enum tc_setup_type type,
-			  u32 handle, u32 chain_index, __be16 proto,
 			  struct tc_to_netdev *tc)
 {
 	switch (type) {
 	case TC_SETUP_CLSFLOWER:
-		return mlx5e_setup_tc_cls_flower(dev, handle, chain_index,
-						 proto, tc->cls_flower);
+		return mlx5e_setup_tc_cls_flower(dev, tc->cls_flower);
 	case TC_SETUP_MQPRIO:
 		return mlx5e_setup_tc_mqprio(dev, tc->mqprio);
 	default:
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index e6cc642f6d8c..e5cf2e7ae052 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -652,15 +652,13 @@ static int mlx5e_rep_get_phys_port_name(struct net_device *dev,
 }
 
 static int mlx5e_rep_setup_tc_cls_flower(struct net_device *dev,
-					 u32 handle, u32 chain_index,
-					 __be16 proto,
 					 struct tc_to_netdev *tc)
 {
 	struct tc_cls_flower_offload *cls_flower = tc->cls_flower;
 	struct mlx5e_priv *priv = netdev_priv(dev);
 
-	if (TC_H_MAJ(handle) != TC_H_MAJ(TC_H_INGRESS) ||
-	    chain_index)
+	if (TC_H_MAJ(cls_flower->common.handle) != TC_H_MAJ(TC_H_INGRESS) ||
+	    cls_flower->common.chain_index)
 		return -EOPNOTSUPP;
 
 	if (cls_flower->egress_dev) {
@@ -668,13 +666,12 @@ static int mlx5e_rep_setup_tc_cls_flower(struct net_device *dev,
 
 		dev = mlx5_eswitch_get_uplink_netdev(esw);
 		return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSFLOWER,
-						     handle, chain_index,
-						     proto, tc);
+						     tc);
 	}
 
 	switch (cls_flower->command) {
 	case TC_CLSFLOWER_REPLACE:
-		return mlx5e_configure_flower(priv, proto, cls_flower);
+		return mlx5e_configure_flower(priv, cls_flower);
 	case TC_CLSFLOWER_DESTROY:
 		return mlx5e_delete_flower(priv, cls_flower);
 	case TC_CLSFLOWER_STATS:
@@ -685,13 +682,11 @@ static int mlx5e_rep_setup_tc_cls_flower(struct net_device *dev,
 }
 
 static int mlx5e_rep_setup_tc(struct net_device *dev, enum tc_setup_type type,
-			      u32 handle, u32 chain_index, __be16 proto,
 			      struct tc_to_netdev *tc)
 {
 	switch (type) {
 	case TC_SETUP_CLSFLOWER:
-		return mlx5e_rep_setup_tc_cls_flower(dev, handle, chain_index,
-						     proto, tc);
+		return mlx5e_rep_setup_tc_cls_flower(dev, tc);
 	default:
 		return -EOPNOTSUPP;
 	}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 78f50d9f621d..3b10d3df7627 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -1939,7 +1939,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
 	return err;
 }
 
-int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol,
+int mlx5e_configure_flower(struct mlx5e_priv *priv,
 			   struct tc_cls_flower_offload *f)
 {
 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
index ecbe30d808ae..5a0f4a487855 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
@@ -38,7 +38,7 @@
 int mlx5e_tc_init(struct mlx5e_priv *priv);
 void mlx5e_tc_cleanup(struct mlx5e_priv *priv);
 
-int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol,
+int mlx5e_configure_flower(struct mlx5e_priv *priv,
 			   struct tc_cls_flower_offload *f);
 int mlx5e_delete_flower(struct mlx5e_priv *priv,
 			struct tc_cls_flower_offload *f);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index f333d086932d..1ca3204f5543 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -1617,11 +1617,11 @@ mlxsw_sp_port_del_cls_matchall_sample(struct mlxsw_sp_port *mlxsw_sp_port)
 }
 
 static int mlxsw_sp_port_add_cls_matchall(struct mlxsw_sp_port *mlxsw_sp_port,
-					  __be16 protocol,
 					  struct tc_cls_matchall_offload *f,
 					  bool ingress)
 {
 	struct mlxsw_sp_port_mall_tc_entry *mall_tc_entry;
+	__be16 protocol = f->common.protocol;
 	const struct tc_action *a;
 	LIST_HEAD(actions);
 	int err;
@@ -1694,18 +1694,16 @@ static void mlxsw_sp_port_del_cls_matchall(struct mlxsw_sp_port *mlxsw_sp_port,
 }
 
 static int mlxsw_sp_setup_tc_cls_matchall(struct mlxsw_sp_port *mlxsw_sp_port,
-					  u32 handle, u32 chain_index,
-					  __be16 proto,
 					  struct tc_cls_matchall_offload *f)
 {
-	bool ingress = TC_H_MAJ(handle) == TC_H_MAJ(TC_H_INGRESS);
+	bool ingress = TC_H_MAJ(f->common.handle) == TC_H_MAJ(TC_H_INGRESS);
 
-	if (chain_index)
+	if (f->common.chain_index)
 		return -EOPNOTSUPP;
 
 	switch (f->command) {
 	case TC_CLSMATCHALL_REPLACE:
-		return mlxsw_sp_port_add_cls_matchall(mlxsw_sp_port, proto, f,
+		return mlxsw_sp_port_add_cls_matchall(mlxsw_sp_port, f,
 						      ingress);
 	case TC_CLSMATCHALL_DESTROY:
 		mlxsw_sp_port_del_cls_matchall(mlxsw_sp_port, f);
@@ -1717,18 +1715,16 @@ static int mlxsw_sp_setup_tc_cls_matchall(struct mlxsw_sp_port *mlxsw_sp_port,
 
 static int
 mlxsw_sp_setup_tc_cls_flower(struct mlxsw_sp_port *mlxsw_sp_port,
-			     u32 handle, u32 chain_index, __be16 proto,
 			     struct tc_cls_flower_offload *f)
 {
-	bool ingress = TC_H_MAJ(handle) == TC_H_MAJ(TC_H_INGRESS);
+	bool ingress = TC_H_MAJ(f->common.handle) == TC_H_MAJ(TC_H_INGRESS);
 
-	if (chain_index)
+	if (f->common.chain_index)
 		return -EOPNOTSUPP;
 
 	switch (f->command) {
 	case TC_CLSFLOWER_REPLACE:
-		return mlxsw_sp_flower_replace(mlxsw_sp_port, ingress,
-					       proto, f);
+		return mlxsw_sp_flower_replace(mlxsw_sp_port, ingress, f);
 	case TC_CLSFLOWER_DESTROY:
 		mlxsw_sp_flower_destroy(mlxsw_sp_port, ingress, f);
 		return 0;
@@ -1740,19 +1736,16 @@ mlxsw_sp_setup_tc_cls_flower(struct mlxsw_sp_port *mlxsw_sp_port,
 }
 
 static int mlxsw_sp_setup_tc(struct net_device *dev, enum tc_setup_type type,
-			     u32 handle, u32 chain_index, __be16 proto,
 			     struct tc_to_netdev *tc)
 {
 	struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
 
 	switch (type) {
 	case TC_SETUP_CLSMATCHALL:
-		return mlxsw_sp_setup_tc_cls_matchall(mlxsw_sp_port, handle,
-						      chain_index, proto,
+		return mlxsw_sp_setup_tc_cls_matchall(mlxsw_sp_port,
 						      tc->cls_mall);
 	case TC_SETUP_CLSFLOWER:
-		return mlxsw_sp_setup_tc_cls_flower(mlxsw_sp_port, handle,
-						    chain_index, proto,
+		return mlxsw_sp_setup_tc_cls_flower(mlxsw_sp_port,
 						    tc->cls_flower);
 	default:
 		return -EOPNOTSUPP;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index e848f06e34e6..8452d1db2f3f 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -508,7 +508,7 @@ extern const struct mlxsw_sp_acl_ops mlxsw_sp_acl_tcam_ops;
 
 /* spectrum_flower.c */
 int mlxsw_sp_flower_replace(struct mlxsw_sp_port *mlxsw_sp_port, bool ingress,
-			    __be16 protocol, struct tc_cls_flower_offload *f);
+			    struct tc_cls_flower_offload *f);
 void mlxsw_sp_flower_destroy(struct mlxsw_sp_port *mlxsw_sp_port, bool ingress,
 			     struct tc_cls_flower_offload *f);
 int mlxsw_sp_flower_stats(struct mlxsw_sp_port *mlxsw_sp_port, bool ingress,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
index 9be48d2e43ca..021b6c0076c0 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
@@ -368,7 +368,7 @@ static int mlxsw_sp_flower_parse(struct mlxsw_sp *mlxsw_sp,
 }
 
 int mlxsw_sp_flower_replace(struct mlxsw_sp_port *mlxsw_sp_port, bool ingress,
-			    __be16 protocol, struct tc_cls_flower_offload *f)
+			    struct tc_cls_flower_offload *f)
 {
 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
 	struct net_device *dev = mlxsw_sp_port->dev;
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.c b/drivers/net/ethernet/netronome/nfp/bpf/main.c
index d7975dcecb40..152a7abb58ed 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/main.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/main.c
@@ -121,20 +121,22 @@ static void nfp_bpf_vnic_clean(struct nfp_app *app, struct nfp_net *nn)
 }
 
 static int nfp_bpf_setup_tc(struct nfp_app *app, struct net_device *netdev,
-			    enum tc_setup_type type, u32 handle, __be16 proto,
+			    enum tc_setup_type type,
 			    struct tc_to_netdev *tc)
 {
+	struct tc_cls_bpf_offload *cls_bpf = tc->cls_bpf;
 	struct nfp_net *nn = netdev_priv(netdev);
 
 	if (type != TC_SETUP_CLSBPF || !nfp_net_ebpf_capable(nn) ||
-	    TC_H_MAJ(handle) != TC_H_MAJ(TC_H_INGRESS) ||
-	    proto != htons(ETH_P_ALL))
+	    TC_H_MAJ(cls_bpf->common.handle) != TC_H_MAJ(TC_H_INGRESS) ||
+	    cls_bpf->common.protocol != htons(ETH_P_ALL) ||
+	    cls_bpf->common.chain_index)
 		return -EOPNOTSUPP;
 
 	if (nn->dp.bpf_offload_xdp)
 		return -EBUSY;
 
-	return nfp_net_bpf_offload(nn, tc->cls_bpf);
+	return nfp_net_bpf_offload(nn, cls_bpf);
 }
 
 static bool nfp_bpf_tc_busy(struct nfp_app *app, struct nfp_net *nn)
diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.h b/drivers/net/ethernet/netronome/nfp/flower/main.h
index 314e6e8ba649..eb94d08e35cf 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/main.h
+++ b/drivers/net/ethernet/netronome/nfp/flower/main.h
@@ -135,8 +135,7 @@ int nfp_flower_metadata_init(struct nfp_app *app);
 void nfp_flower_metadata_cleanup(struct nfp_app *app);
 
 int nfp_flower_setup_tc(struct nfp_app *app, struct net_device *netdev,
-			enum tc_setup_type type, u32 handle, __be16 proto,
-			struct tc_to_netdev *tc);
+			enum tc_setup_type type, struct tc_to_netdev *tc);
 int nfp_flower_compile_flow_match(struct tc_cls_flower_offload *flow,
 				  struct nfp_fl_key_ls *key_ls,
 				  struct net_device *netdev,
diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c
index 58af438a95c1..8197836c650d 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/offload.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c
@@ -385,13 +385,15 @@ nfp_flower_repr_offload(struct nfp_app *app, struct net_device *netdev,
 }
 
 int nfp_flower_setup_tc(struct nfp_app *app, struct net_device *netdev,
-			enum tc_setup_type type, u32 handle, __be16 proto,
-			struct tc_to_netdev *tc)
+			enum tc_setup_type type, struct tc_to_netdev *tc)
 {
+	struct tc_cls_flower_offload *cls_flower = tc->cls_flower;
+
 	if (type != TC_SETUP_CLSFLOWER ||
-	    TC_H_MAJ(handle) != TC_H_MAJ(TC_H_INGRESS) ||
-	    !eth_proto_is_802_3(proto))
+	    TC_H_MAJ(cls_flower->common.handle) != TC_H_MAJ(TC_H_INGRESS) ||
+	    !eth_proto_is_802_3(cls_flower->common.protocol) ||
+	    cls_flower->common.chain_index)
 		return -EOPNOTSUPP;
 
-	return nfp_flower_repr_offload(app, netdev, tc->cls_flower);
+	return nfp_flower_repr_offload(app, netdev, cls_flower);
 }
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_app.h b/drivers/net/ethernet/netronome/nfp/nfp_app.h
index b3b03bb9d907..7a2f950b149c 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_app.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_app.h
@@ -109,8 +109,7 @@ struct nfp_app_type {
 	void (*ctrl_msg_rx)(struct nfp_app *app, struct sk_buff *skb);
 
 	int (*setup_tc)(struct nfp_app *app, struct net_device *netdev,
-			enum tc_setup_type type, u32 handle, __be16 proto,
-			struct tc_to_netdev *tc);
+			enum tc_setup_type type, struct tc_to_netdev *tc);
 	bool (*tc_busy)(struct nfp_app *app, struct nfp_net *nn);
 	int (*xdp_offload)(struct nfp_app *app, struct nfp_net *nn,
 			   struct bpf_prog *prog);
@@ -240,12 +239,11 @@ static inline bool nfp_app_tc_busy(struct nfp_app *app, struct nfp_net *nn)
 static inline int nfp_app_setup_tc(struct nfp_app *app,
 				   struct net_device *netdev,
 				   enum tc_setup_type type,
-				   u32 handle, __be16 proto,
 				   struct tc_to_netdev *tc)
 {
 	if (!app || !app->type->setup_tc)
 		return -EOPNOTSUPP;
-	return app->type->setup_tc(app, netdev, type, handle, proto, tc);
+	return app->type->setup_tc(app, netdev, type, tc);
 }
 
 static inline int nfp_app_xdp_offload(struct nfp_app *app, struct nfp_net *nn,
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_port.c b/drivers/net/ethernet/netronome/nfp/nfp_port.c
index 9d776f982352..e8abab2b912e 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_port.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_port.c
@@ -89,19 +89,15 @@ const struct switchdev_ops nfp_port_switchdev_ops = {
 };
 
 int nfp_port_setup_tc(struct net_device *netdev, enum tc_setup_type type,
-		      u32 handle, u32 chain_index, __be16 proto,
 		      struct tc_to_netdev *tc)
 {
 	struct nfp_port *port;
 
-	if (chain_index)
-		return -EOPNOTSUPP;
-
 	port = nfp_port_from_netdev(netdev);
 	if (!port)
 		return -EOPNOTSUPP;
 
-	return nfp_app_setup_tc(port->app, netdev, type, handle, proto, tc);
+	return nfp_app_setup_tc(port->app, netdev, type, tc);
 }
 
 struct nfp_port *
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_port.h b/drivers/net/ethernet/netronome/nfp/nfp_port.h
index 239c5401000c..252f06d4307f 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_port.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_port.h
@@ -110,7 +110,6 @@ struct nfp_port {
 extern const struct switchdev_ops nfp_port_switchdev_ops;
 
 int nfp_port_setup_tc(struct net_device *netdev, enum tc_setup_type type,
-		      u32 handle, u32 chain_index, __be16 proto,
 		      struct tc_to_netdev *tc);
 
 struct nfp_port *nfp_port_from_netdev(struct net_device *netdev);
diff --git a/drivers/net/ethernet/sfc/efx.h b/drivers/net/ethernet/sfc/efx.h
index e41a7179bc05..b0c6004db138 100644
--- a/drivers/net/ethernet/sfc/efx.h
+++ b/drivers/net/ethernet/sfc/efx.h
@@ -33,7 +33,6 @@ netdev_tx_t efx_hard_start_xmit(struct sk_buff *skb,
 netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb);
 void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index);
 int efx_setup_tc(struct net_device *net_dev, enum tc_setup_type type,
-		 u32 handle, u32 chain_index, __be16 proto,
 		 struct tc_to_netdev *tc);
 unsigned int efx_tx_max_skb_descs(struct efx_nic *efx);
 extern unsigned int efx_piobuf_size;
diff --git a/drivers/net/ethernet/sfc/falcon/efx.h b/drivers/net/ethernet/sfc/falcon/efx.h
index f3bc67ec1f30..4497511fc914 100644
--- a/drivers/net/ethernet/sfc/falcon/efx.h
+++ b/drivers/net/ethernet/sfc/falcon/efx.h
@@ -33,7 +33,6 @@ netdev_tx_t ef4_hard_start_xmit(struct sk_buff *skb,
 netdev_tx_t ef4_enqueue_skb(struct ef4_tx_queue *tx_queue, struct sk_buff *skb);
 void ef4_xmit_done(struct ef4_tx_queue *tx_queue, unsigned int index);
 int ef4_setup_tc(struct net_device *net_dev, enum tc_setup_type type,
-		 u32 handle, u32 chain_index, __be16 proto,
 		 struct tc_to_netdev *tc);
 unsigned int ef4_tx_max_skb_descs(struct ef4_nic *efx);
 extern bool ef4_separate_tx_channels;
diff --git a/drivers/net/ethernet/sfc/falcon/tx.c b/drivers/net/ethernet/sfc/falcon/tx.c
index 6c4752694c1f..447519ac3fa4 100644
--- a/drivers/net/ethernet/sfc/falcon/tx.c
+++ b/drivers/net/ethernet/sfc/falcon/tx.c
@@ -426,7 +426,6 @@ void ef4_init_tx_queue_core_txq(struct ef4_tx_queue *tx_queue)
 }
 
 int ef4_setup_tc(struct net_device *net_dev, enum tc_setup_type type,
-		 u32 handle, u32 chain_index, __be16 proto,
 		 struct tc_to_netdev *ntc)
 {
 	struct ef4_nic *efx = netdev_priv(net_dev);
diff --git a/drivers/net/ethernet/sfc/tx.c b/drivers/net/ethernet/sfc/tx.c
index 0c08c10d751c..d17af918ac50 100644
--- a/drivers/net/ethernet/sfc/tx.c
+++ b/drivers/net/ethernet/sfc/tx.c
@@ -654,7 +654,6 @@ void efx_init_tx_queue_core_txq(struct efx_tx_queue *tx_queue)
 }
 
 int efx_setup_tc(struct net_device *net_dev, enum tc_setup_type type,
-		 u32 handle, u32 chain_index, __be16 proto,
 		 struct tc_to_netdev *ntc)
 {
 	struct efx_nic *efx = netdev_priv(net_dev);
diff --git a/drivers/net/ethernet/ti/netcp_core.c b/drivers/net/ethernet/ti/netcp_core.c
index cb21742f6177..14f91b285f00 100644
--- a/drivers/net/ethernet/ti/netcp_core.c
+++ b/drivers/net/ethernet/ti/netcp_core.c
@@ -1878,7 +1878,6 @@ static u16 netcp_select_queue(struct net_device *dev, struct sk_buff *skb,
 }
 
 static int netcp_setup_tc(struct net_device *dev, enum tc_setup_type type,
-			  u32 handle, u32 chain_index, __be16 proto,
 			  struct tc_to_netdev *tc)
 {
 	u8 num_tc;
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index bd49dbaee84e..6e2f7e38cf8e 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -977,7 +977,6 @@ struct xfrmdev_ops {
  * int (*ndo_set_vf_rss_query_en)(struct net_device *dev, int vf, bool setting);
  * int (*ndo_get_vf_port)(struct net_device *dev, int vf, struct sk_buff *skb);
  * int (*ndo_setup_tc)(struct net_device *dev, enum tc_setup_type type,
- *		       u32 handle, u32 chain_index, __be16 protocol,
  *		       struct tc_to_netdev *tc);
  *	Called to setup any 'tc' scheduler, classifier or action on @dev.
  *	This is always called from the stack with the rtnl lock held and netif
@@ -1227,8 +1226,6 @@ struct net_device_ops {
 						   int vf, bool setting);
 	int			(*ndo_setup_tc)(struct net_device *dev,
 						enum tc_setup_type type,
-						u32 handle, u32 chain_index,
-						__be16 protocol,
 						struct tc_to_netdev *tc);
 #if IS_ENABLED(CONFIG_FCOE)
 	int			(*ndo_fcoe_enable)(struct net_device *dev);
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 8213acdfdf5a..ffaddf72108e 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -405,6 +405,21 @@ tcf_match_indev(struct sk_buff *skb, int ifindex)
 }
 #endif /* CONFIG_NET_CLS_IND */
 
+struct tc_cls_common_offload {
+	u32 handle;
+	u32 chain_index;
+	__be16 protocol;
+};
+
+static inline void
+tc_cls_common_offload_init(struct tc_cls_common_offload *cls_common,
+			   const struct tcf_proto *tp)
+{
+	cls_common->handle = tp->q->handle;
+	cls_common->chain_index = tp->chain->index;
+	cls_common->protocol = tp->protocol;
+}
+
 struct tc_cls_u32_knode {
 	struct tcf_exts *exts;
 	struct tc_u32_sel *sel;
@@ -431,6 +446,7 @@ enum tc_clsu32_command {
 };
 
 struct tc_cls_u32_offload {
+	struct tc_cls_common_offload common;
 	/* knode values */
 	enum tc_clsu32_command command;
 	union {
@@ -497,6 +513,7 @@ enum tc_fl_command {
 };
 
 struct tc_cls_flower_offload {
+	struct tc_cls_common_offload common;
 	enum tc_fl_command command;
 	u32 prio;
 	unsigned long cookie;
@@ -513,6 +530,7 @@ enum tc_matchall_command {
 };
 
 struct tc_cls_matchall_offload {
+	struct tc_cls_common_offload common;
 	enum tc_matchall_command command;
 	struct tcf_exts *exts;
 	unsigned long cookie;
@@ -526,6 +544,7 @@ enum tc_clsbpf_command {
 };
 
 struct tc_cls_bpf_offload {
+	struct tc_cls_common_offload common;
 	enum tc_clsbpf_command command;
 	struct tcf_exts *exts;
 	struct bpf_prog *prog;
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index e76d576b941d..5e01e9271619 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -774,12 +774,12 @@ dsa_slave_mall_tc_entry_find(struct dsa_slave_priv *p,
 }
 
 static int dsa_slave_add_cls_matchall(struct net_device *dev,
-				      __be16 protocol,
 				      struct tc_cls_matchall_offload *cls,
 				      bool ingress)
 {
 	struct dsa_slave_priv *p = netdev_priv(dev);
 	struct dsa_mall_tc_entry *mall_tc_entry;
+	__be16 protocol = cls->common.protocol;
 	struct dsa_switch *ds = p->dp->ds;
 	struct net *net = dev_net(dev);
 	struct dsa_slave_priv *to_p;
@@ -864,18 +864,16 @@ static void dsa_slave_del_cls_matchall(struct net_device *dev,
 }
 
 static int dsa_slave_setup_tc_cls_matchall(struct net_device *dev,
-					   u32 handle, u32 chain_index,
-					   __be16 protocol,
 					   struct tc_cls_matchall_offload *cls)
 {
-	bool ingress = TC_H_MAJ(handle) == TC_H_MAJ(TC_H_INGRESS);
+	bool ingress = TC_H_MAJ(cls->common.handle) == TC_H_MAJ(TC_H_INGRESS);
 
-	if (chain_index)
+	if (cls->common.chain_index)
 		return -EOPNOTSUPP;
 
 	switch (cls->command) {
 	case TC_CLSMATCHALL_REPLACE:
-		return dsa_slave_add_cls_matchall(dev, protocol, cls, ingress);
+		return dsa_slave_add_cls_matchall(dev, cls, ingress);
 	case TC_CLSMATCHALL_DESTROY:
 		dsa_slave_del_cls_matchall(dev, cls);
 		return 0;
@@ -885,13 +883,11 @@ static int dsa_slave_setup_tc_cls_matchall(struct net_device *dev,
 }
 
 static int dsa_slave_setup_tc(struct net_device *dev, enum tc_setup_type type,
-			      u32 handle, u32 chain_index, __be16 protocol,
 			      struct tc_to_netdev *tc)
 {
 	switch (type) {
 	case TC_SETUP_CLSMATCHALL:
-		return dsa_slave_setup_tc_cls_matchall(dev, handle, chain_index,
-						       protocol, tc->cls_mall);
+		return dsa_slave_setup_tc_cls_matchall(dev, tc->cls_mall);
 	default:
 		return -EOPNOTSUPP;
 	}
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index e2bf2753173d..dde8efdcee3b 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -153,6 +153,7 @@ static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog,
 
 	offload.cls_bpf = &bpf_offload;
 
+	tc_cls_common_offload_init(&bpf_offload.common, tp);
 	bpf_offload.command = cmd;
 	bpf_offload.exts = &prog->exts;
 	bpf_offload.prog = prog->filter;
@@ -160,11 +161,7 @@ static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog,
 	bpf_offload.exts_integrated = prog->exts_integrated;
 	bpf_offload.gen_flags = prog->gen_flags;
 
-	err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSBPF,
-					    tp->q->handle,
-					    tp->chain->index,
-					    tp->protocol, &offload);
-
+	err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSBPF, &offload);
 	if (!err && (cmd == TC_CLSBPF_ADD || cmd == TC_CLSBPF_REPLACE))
 		prog->gen_flags |= TCA_CLS_FLAGS_IN_HW;
 
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 52deeed2b7f5..1fdf2889ba9f 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -232,14 +232,14 @@ static void fl_hw_destroy_filter(struct tcf_proto *tp, struct cls_fl_filter *f)
 	if (!tc_can_offload(dev, tp))
 		return;
 
+	tc_cls_common_offload_init(&offload.common, tp);
 	offload.command = TC_CLSFLOWER_DESTROY;
 	offload.prio = tp->prio;
 	offload.cookie = (unsigned long)f;
 
 	tc->cls_flower = &offload;
 
-	dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSFLOWER, tp->q->handle,
-				      tp->chain->index, tp->protocol, tc);
+	dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSFLOWER, tc);
 }
 
 static int fl_hw_replace_filter(struct tcf_proto *tp,
@@ -264,6 +264,7 @@ static int fl_hw_replace_filter(struct tcf_proto *tp,
 		f->hw_dev = dev;
 	}
 
+	tc_cls_common_offload_init(&offload.common, tp);
 	offload.command = TC_CLSFLOWER_REPLACE;
 	offload.prio = tp->prio;
 	offload.cookie = (unsigned long)f;
@@ -274,9 +275,7 @@ static int fl_hw_replace_filter(struct tcf_proto *tp,
 
 	tc->cls_flower = &offload;
 
-	err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSFLOWER,
-					    tp->q->handle, tp->chain->index,
-					    tp->protocol, tc);
+	err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSFLOWER, tc);
 	if (!err)
 		f->flags |= TCA_CLS_FLAGS_IN_HW;
 
@@ -294,6 +293,7 @@ static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f)
 	if (!tc_can_offload(dev, tp))
 		return;
 
+	tc_cls_common_offload_init(&offload.common, tp);
 	offload.command = TC_CLSFLOWER_STATS;
 	offload.prio = tp->prio;
 	offload.cookie = (unsigned long)f;
@@ -301,8 +301,7 @@ static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f)
 
 	tc->cls_flower = &offload;
 
-	dev->netdev_ops->ndo_setup_tc(dev, TC_CLSFLOWER_STATS, tp->q->handle,
-				      tp->chain->index, tp->protocol, tc);
+	dev->netdev_ops->ndo_setup_tc(dev, TC_CLSFLOWER_STATS, tc);
 }
 
 static void __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f)
diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c
index a8853ada22f6..174c700160ca 100644
--- a/net/sched/cls_matchall.c
+++ b/net/sched/cls_matchall.c
@@ -58,14 +58,14 @@ static int mall_replace_hw_filter(struct tcf_proto *tp,
 	struct tc_cls_matchall_offload mall_offload = {0};
 	int err;
 
+	tc_cls_common_offload_init(&mall_offload.common, tp);
 	offload.cls_mall = &mall_offload;
 	offload.cls_mall->command = TC_CLSMATCHALL_REPLACE;
 	offload.cls_mall->exts = &head->exts;
 	offload.cls_mall->cookie = cookie;
 
 	err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSMATCHALL,
-					    tp->q->handle, tp->chain->index,
-					    tp->protocol, &offload);
+					    &offload);
 	if (!err)
 		head->flags |= TCA_CLS_FLAGS_IN_HW;
 
@@ -80,13 +80,13 @@ static void mall_destroy_hw_filter(struct tcf_proto *tp,
 	struct tc_to_netdev offload;
 	struct tc_cls_matchall_offload mall_offload = {0};
 
+	tc_cls_common_offload_init(&mall_offload.common, tp);
 	offload.cls_mall = &mall_offload;
 	offload.cls_mall->command = TC_CLSMATCHALL_DESTROY;
 	offload.cls_mall->exts = NULL;
 	offload.cls_mall->cookie = cookie;
 
-	dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSMATCHALL, tp->q->handle,
-				      tp->chain->index, tp->protocol, &offload);
+	dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSMATCHALL, &offload);
 }
 
 static void mall_destroy(struct tcf_proto *tp)
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index d1bae4cc749f..c0f59c471523 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -437,11 +437,10 @@ static void u32_remove_hw_knode(struct tcf_proto *tp, u32 handle)
 	offload.cls_u32 = &u32_offload;
 
 	if (tc_should_offload(dev, tp, 0)) {
+		tc_cls_common_offload_init(&u32_offload.common, tp);
 		offload.cls_u32->command = TC_CLSU32_DELETE_KNODE;
 		offload.cls_u32->knode.handle = handle;
-		dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSU32,
-					      tp->q->handle, tp->chain->index,
-					      tp->protocol, &offload);
+		dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSU32, &offload);
 	}
 }
 
@@ -458,14 +457,13 @@ static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h,
 
 	offload.cls_u32 = &u32_offload;
 
+	tc_cls_common_offload_init(&u32_offload.common, tp);
 	offload.cls_u32->command = TC_CLSU32_NEW_HNODE;
 	offload.cls_u32->hnode.divisor = h->divisor;
 	offload.cls_u32->hnode.handle = h->handle;
 	offload.cls_u32->hnode.prio = h->prio;
 
-	err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSU32, tp->q->handle,
-					    tp->chain->index, tp->protocol,
-					    &offload);
+	err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSU32, &offload);
 	if (tc_skip_sw(flags))
 		return err;
 
@@ -481,14 +479,13 @@ static void u32_clear_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h)
 	offload.cls_u32 = &u32_offload;
 
 	if (tc_should_offload(dev, tp, 0)) {
+		tc_cls_common_offload_init(&u32_offload.common, tp);
 		offload.cls_u32->command = TC_CLSU32_DELETE_HNODE;
 		offload.cls_u32->hnode.divisor = h->divisor;
 		offload.cls_u32->hnode.handle = h->handle;
 		offload.cls_u32->hnode.prio = h->prio;
 
-		dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSU32,
-					      tp->q->handle, tp->chain->index,
-					      tp->protocol, &offload);
+		dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSU32, &offload);
 	}
 }
 
@@ -505,6 +502,7 @@ static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n,
 	if (!tc_should_offload(dev, tp, flags))
 		return tc_skip_sw(flags) ? -EINVAL : 0;
 
+	tc_cls_common_offload_init(&u32_offload.common, tp);
 	offload.cls_u32->command = TC_CLSU32_REPLACE_KNODE;
 	offload.cls_u32->knode.handle = n->handle;
 	offload.cls_u32->knode.fshift = n->fshift;
@@ -520,9 +518,7 @@ static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n,
 	if (n->ht_down)
 		offload.cls_u32->knode.link_handle = n->ht_down->handle;
 
-	err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSU32, tp->q->handle,
-					    tp->chain->index, tp->protocol,
-					    &offload);
+	err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSU32, &offload);
 
 	if (!err)
 		n->flags |= TCA_CLS_FLAGS_IN_HW;
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
index 329610ce4dfe..09b577dde49c 100644
--- a/net/sched/sch_mqprio.c
+++ b/net/sched/sch_mqprio.c
@@ -42,8 +42,7 @@ static void mqprio_destroy(struct Qdisc *sch)
 		struct tc_mqprio_qopt offload = { 0 };
 		struct tc_to_netdev tc = { { .mqprio = &offload } };
 
-		dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_MQPRIO,
-					      sch->handle, 0, 0, &tc);
+		dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_MQPRIO, &tc);
 	} else {
 		netdev_set_num_tc(dev, 0);
 	}
@@ -151,8 +150,7 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt)
 		struct tc_mqprio_qopt offload = *qopt;
 		struct tc_to_netdev tc = { { .mqprio = &offload } };
 
-		err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_MQPRIO,
-						    sch->handle, 0, 0, &tc);
+		err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_MQPRIO, &tc);
 		if (err)
 			return err;
 
-- 
cgit v1.2.3


From d7c1c8d2e53be974b5c72e31d7d35f6d9737fe84 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Mon, 7 Aug 2017 10:15:30 +0200
Subject: net: sched: move prio into cls_common

prio is not cls_flower specific, but it is meaningful for all
classifiers. Seems that only mlxsw cares about the value. Obviously,
cls offload in other drivers is broken.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c | 2 +-
 include/net/pkt_cls.h                                 | 3 ++-
 net/sched/cls_flower.c                                | 3 ---
 3 files changed, 3 insertions(+), 5 deletions(-)

(limited to 'include/net')

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
index 021b6c0076c0..95428b41c50f 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
@@ -270,7 +270,7 @@ static int mlxsw_sp_flower_parse(struct mlxsw_sp *mlxsw_sp,
 		return -EOPNOTSUPP;
 	}
 
-	mlxsw_sp_acl_rulei_priority(rulei, f->prio);
+	mlxsw_sp_acl_rulei_priority(rulei, f->common.prio);
 
 	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_CONTROL)) {
 		struct flow_dissector_key_control *key =
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index ffaddf72108e..572083af02ac 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -409,6 +409,7 @@ struct tc_cls_common_offload {
 	u32 handle;
 	u32 chain_index;
 	__be16 protocol;
+	u32 prio;
 };
 
 static inline void
@@ -418,6 +419,7 @@ tc_cls_common_offload_init(struct tc_cls_common_offload *cls_common,
 	cls_common->handle = tp->q->handle;
 	cls_common->chain_index = tp->chain->index;
 	cls_common->protocol = tp->protocol;
+	cls_common->prio = tp->prio;
 }
 
 struct tc_cls_u32_knode {
@@ -515,7 +517,6 @@ enum tc_fl_command {
 struct tc_cls_flower_offload {
 	struct tc_cls_common_offload common;
 	enum tc_fl_command command;
-	u32 prio;
 	unsigned long cookie;
 	struct flow_dissector *dissector;
 	struct fl_flow_key *mask;
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 1fdf2889ba9f..ccdf2f5014ca 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -234,7 +234,6 @@ static void fl_hw_destroy_filter(struct tcf_proto *tp, struct cls_fl_filter *f)
 
 	tc_cls_common_offload_init(&offload.common, tp);
 	offload.command = TC_CLSFLOWER_DESTROY;
-	offload.prio = tp->prio;
 	offload.cookie = (unsigned long)f;
 
 	tc->cls_flower = &offload;
@@ -266,7 +265,6 @@ static int fl_hw_replace_filter(struct tcf_proto *tp,
 
 	tc_cls_common_offload_init(&offload.common, tp);
 	offload.command = TC_CLSFLOWER_REPLACE;
-	offload.prio = tp->prio;
 	offload.cookie = (unsigned long)f;
 	offload.dissector = dissector;
 	offload.mask = mask;
@@ -295,7 +293,6 @@ static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f)
 
 	tc_cls_common_offload_init(&offload.common, tp);
 	offload.command = TC_CLSFLOWER_STATS;
-	offload.prio = tp->prio;
 	offload.cookie = (unsigned long)f;
 	offload.exts = &f->exts;
 
-- 
cgit v1.2.3


From fb74c27735f0a34e76dbf1972084e984ad2ea145 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Mon, 7 Aug 2017 08:44:16 -0700
Subject: net: ipv4: add second dif to udp socket lookups

Add a second device index, sdif, to udp socket lookups. sdif is the
index for ingress devices enslaved to an l3mdev. It allows the lookups
to consider the enslaved device as well as the L3 domain when searching
for a socket.

Early demux lookups are handled in the next patch as part of INET_MATCH
changes.

Signed-off-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip.h    | 10 +++++++++
 include/net/udp.h   |  2 +-
 net/ipv4/udp.c      | 58 +++++++++++++++++++++++++++++++----------------------
 net/ipv4/udp_diag.c |  6 +++---
 4 files changed, 48 insertions(+), 28 deletions(-)

(limited to 'include/net')

diff --git a/include/net/ip.h b/include/net/ip.h
index 9e59dcf1787a..39db596eb89f 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -78,6 +78,16 @@ struct ipcm_cookie {
 #define IPCB(skb) ((struct inet_skb_parm*)((skb)->cb))
 #define PKTINFO_SKB_CB(skb) ((struct in_pktinfo *)((skb)->cb))
 
+/* return enslaved device index if relevant */
+static inline int inet_sdif(struct sk_buff *skb)
+{
+#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
+	if (skb && ipv4_l3mdev_skb(IPCB(skb)->flags))
+		return IPCB(skb)->iif;
+#endif
+	return 0;
+}
+
 struct ip_ra_chain {
 	struct ip_ra_chain __rcu *next;
 	struct sock		*sk;
diff --git a/include/net/udp.h b/include/net/udp.h
index cc8036987dcb..826c713d5a48 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -287,7 +287,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
 struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport,
 			     __be32 daddr, __be16 dport, int dif);
 struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport,
-			       __be32 daddr, __be16 dport, int dif,
+			       __be32 daddr, __be16 dport, int dif, int sdif,
 			       struct udp_table *tbl, struct sk_buff *skb);
 struct sock *udp4_lib_lookup_skb(struct sk_buff *skb,
 				 __be16 sport, __be16 dport);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 38bca2c4897d..fe14429e4a6c 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -380,8 +380,8 @@ int udp_v4_get_port(struct sock *sk, unsigned short snum)
 
 static int compute_score(struct sock *sk, struct net *net,
 			 __be32 saddr, __be16 sport,
-			 __be32 daddr, unsigned short hnum, int dif,
-			 bool exact_dif)
+			 __be32 daddr, unsigned short hnum,
+			 int dif, int sdif, bool exact_dif)
 {
 	int score;
 	struct inet_sock *inet;
@@ -413,10 +413,15 @@ static int compute_score(struct sock *sk, struct net *net,
 	}
 
 	if (sk->sk_bound_dev_if || exact_dif) {
-		if (sk->sk_bound_dev_if != dif)
+		bool dev_match = (sk->sk_bound_dev_if == dif ||
+				  sk->sk_bound_dev_if == sdif);
+
+		if (exact_dif && !dev_match)
 			return -1;
-		score += 4;
+		if (sk->sk_bound_dev_if && dev_match)
+			score += 4;
 	}
+
 	if (sk->sk_incoming_cpu == raw_smp_processor_id())
 		score++;
 	return score;
@@ -436,10 +441,11 @@ static u32 udp_ehashfn(const struct net *net, const __be32 laddr,
 
 /* called with rcu_read_lock() */
 static struct sock *udp4_lib_lookup2(struct net *net,
-		__be32 saddr, __be16 sport,
-		__be32 daddr, unsigned int hnum, int dif, bool exact_dif,
-		struct udp_hslot *hslot2,
-		struct sk_buff *skb)
+				     __be32 saddr, __be16 sport,
+				     __be32 daddr, unsigned int hnum,
+				     int dif, int sdif, bool exact_dif,
+				     struct udp_hslot *hslot2,
+				     struct sk_buff *skb)
 {
 	struct sock *sk, *result;
 	int score, badness, matches = 0, reuseport = 0;
@@ -449,7 +455,7 @@ static struct sock *udp4_lib_lookup2(struct net *net,
 	badness = 0;
 	udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
 		score = compute_score(sk, net, saddr, sport,
-				      daddr, hnum, dif, exact_dif);
+				      daddr, hnum, dif, sdif, exact_dif);
 		if (score > badness) {
 			reuseport = sk->sk_reuseport;
 			if (reuseport) {
@@ -477,8 +483,8 @@ static struct sock *udp4_lib_lookup2(struct net *net,
  * harder than this. -DaveM
  */
 struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
-		__be16 sport, __be32 daddr, __be16 dport,
-		int dif, struct udp_table *udptable, struct sk_buff *skb)
+		__be16 sport, __be32 daddr, __be16 dport, int dif,
+		int sdif, struct udp_table *udptable, struct sk_buff *skb)
 {
 	struct sock *sk, *result;
 	unsigned short hnum = ntohs(dport);
@@ -496,7 +502,7 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
 			goto begin;
 
 		result = udp4_lib_lookup2(net, saddr, sport,
-					  daddr, hnum, dif,
+					  daddr, hnum, dif, sdif,
 					  exact_dif, hslot2, skb);
 		if (!result) {
 			unsigned int old_slot2 = slot2;
@@ -511,7 +517,7 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
 				goto begin;
 
 			result = udp4_lib_lookup2(net, saddr, sport,
-						  daddr, hnum, dif,
+						  daddr, hnum, dif, sdif,
 						  exact_dif, hslot2, skb);
 		}
 		return result;
@@ -521,7 +527,7 @@ begin:
 	badness = 0;
 	sk_for_each_rcu(sk, &hslot->head) {
 		score = compute_score(sk, net, saddr, sport,
-				      daddr, hnum, dif, exact_dif);
+				      daddr, hnum, dif, sdif, exact_dif);
 		if (score > badness) {
 			reuseport = sk->sk_reuseport;
 			if (reuseport) {
@@ -554,7 +560,7 @@ static inline struct sock *__udp4_lib_lookup_skb(struct sk_buff *skb,
 
 	return __udp4_lib_lookup(dev_net(skb->dev), iph->saddr, sport,
 				 iph->daddr, dport, inet_iif(skb),
-				 udptable, skb);
+				 inet_sdif(skb), udptable, skb);
 }
 
 struct sock *udp4_lib_lookup_skb(struct sk_buff *skb,
@@ -576,7 +582,7 @@ struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport,
 	struct sock *sk;
 
 	sk = __udp4_lib_lookup(net, saddr, sport, daddr, dport,
-			       dif, &udp_table, NULL);
+			       dif, 0, &udp_table, NULL);
 	if (sk && !refcount_inc_not_zero(&sk->sk_refcnt))
 		sk = NULL;
 	return sk;
@@ -587,7 +593,7 @@ EXPORT_SYMBOL_GPL(udp4_lib_lookup);
 static inline bool __udp_is_mcast_sock(struct net *net, struct sock *sk,
 				       __be16 loc_port, __be32 loc_addr,
 				       __be16 rmt_port, __be32 rmt_addr,
-				       int dif, unsigned short hnum)
+				       int dif, int sdif, unsigned short hnum)
 {
 	struct inet_sock *inet = inet_sk(sk);
 
@@ -597,7 +603,8 @@ static inline bool __udp_is_mcast_sock(struct net *net, struct sock *sk,
 	    (inet->inet_dport != rmt_port && inet->inet_dport) ||
 	    (inet->inet_rcv_saddr && inet->inet_rcv_saddr != loc_addr) ||
 	    ipv6_only_sock(sk) ||
-	    (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif))
+	    (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif &&
+	     sk->sk_bound_dev_if != sdif))
 		return false;
 	if (!ip_mc_sf_allow(sk, loc_addr, rmt_addr, dif))
 		return false;
@@ -628,8 +635,8 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable)
 	struct net *net = dev_net(skb->dev);
 
 	sk = __udp4_lib_lookup(net, iph->daddr, uh->dest,
-			iph->saddr, uh->source, skb->dev->ifindex, udptable,
-			NULL);
+			       iph->saddr, uh->source, skb->dev->ifindex, 0,
+			       udptable, NULL);
 	if (!sk) {
 		__ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
 		return;	/* No socket for error */
@@ -1953,6 +1960,7 @@ static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
 	unsigned int hash2 = 0, hash2_any = 0, use_hash2 = (hslot->count > 10);
 	unsigned int offset = offsetof(typeof(*sk), sk_node);
 	int dif = skb->dev->ifindex;
+	int sdif = inet_sdif(skb);
 	struct hlist_node *node;
 	struct sk_buff *nskb;
 
@@ -1967,7 +1975,7 @@ start_lookup:
 
 	sk_for_each_entry_offset_rcu(sk, node, &hslot->head, offset) {
 		if (!__udp_is_mcast_sock(net, sk, uh->dest, daddr,
-					 uh->source, saddr, dif, hnum))
+					 uh->source, saddr, dif, sdif, hnum))
 			continue;
 
 		if (!first) {
@@ -2157,7 +2165,7 @@ drop:
 static struct sock *__udp4_lib_mcast_demux_lookup(struct net *net,
 						  __be16 loc_port, __be32 loc_addr,
 						  __be16 rmt_port, __be32 rmt_addr,
-						  int dif)
+						  int dif, int sdif)
 {
 	struct sock *sk, *result;
 	unsigned short hnum = ntohs(loc_port);
@@ -2171,7 +2179,7 @@ static struct sock *__udp4_lib_mcast_demux_lookup(struct net *net,
 	result = NULL;
 	sk_for_each_rcu(sk, &hslot->head) {
 		if (__udp_is_mcast_sock(net, sk, loc_port, loc_addr,
-					rmt_port, rmt_addr, dif, hnum)) {
+					rmt_port, rmt_addr, dif, sdif, hnum)) {
 			if (result)
 				return NULL;
 			result = sk;
@@ -2216,6 +2224,7 @@ void udp_v4_early_demux(struct sk_buff *skb)
 	struct sock *sk = NULL;
 	struct dst_entry *dst;
 	int dif = skb->dev->ifindex;
+	int sdif = inet_sdif(skb);
 	int ours;
 
 	/* validate the packet */
@@ -2241,7 +2250,8 @@ void udp_v4_early_demux(struct sk_buff *skb)
 		}
 
 		sk = __udp4_lib_mcast_demux_lookup(net, uh->dest, iph->daddr,
-						   uh->source, iph->saddr, dif);
+						   uh->source, iph->saddr,
+						   dif, sdif);
 	} else if (skb->pkt_type == PACKET_HOST) {
 		sk = __udp4_lib_demux_lookup(net, uh->dest, iph->daddr,
 					     uh->source, iph->saddr, dif);
diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c
index 4515836d2a3a..1f07fe109535 100644
--- a/net/ipv4/udp_diag.c
+++ b/net/ipv4/udp_diag.c
@@ -45,7 +45,7 @@ static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb,
 		sk = __udp4_lib_lookup(net,
 				req->id.idiag_src[0], req->id.idiag_sport,
 				req->id.idiag_dst[0], req->id.idiag_dport,
-				req->id.idiag_if, tbl, NULL);
+				req->id.idiag_if, 0, tbl, NULL);
 #if IS_ENABLED(CONFIG_IPV6)
 	else if (req->sdiag_family == AF_INET6)
 		sk = __udp6_lib_lookup(net,
@@ -182,7 +182,7 @@ static int __udp_diag_destroy(struct sk_buff *in_skb,
 		sk = __udp4_lib_lookup(net,
 				req->id.idiag_dst[0], req->id.idiag_dport,
 				req->id.idiag_src[0], req->id.idiag_sport,
-				req->id.idiag_if, tbl, NULL);
+				req->id.idiag_if, 0, tbl, NULL);
 #if IS_ENABLED(CONFIG_IPV6)
 	else if (req->sdiag_family == AF_INET6) {
 		if (ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_dst) &&
@@ -190,7 +190,7 @@ static int __udp_diag_destroy(struct sk_buff *in_skb,
 			sk = __udp4_lib_lookup(net,
 					req->id.idiag_dst[3], req->id.idiag_dport,
 					req->id.idiag_src[3], req->id.idiag_sport,
-					req->id.idiag_if, tbl, NULL);
+					req->id.idiag_if, 0, tbl, NULL);
 
 		else
 			sk = __udp6_lib_lookup(net,
-- 
cgit v1.2.3


From 3fa6f616a7a4d0bdf4d877d530456d8a5c3b109b Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Mon, 7 Aug 2017 08:44:17 -0700
Subject: net: ipv4: add second dif to inet socket lookups

Add a second device index, sdif, to inet socket lookups. sdif is the
index for ingress devices enslaved to an l3mdev. It allows the lookups
to consider the enslaved device as well as the L3 domain when searching
for a socket.

TCP moves the data in the cb. Prior to tcp_v4_rcv (e.g., early demux) the
ingress index is obtained from IPCB using inet_sdif and after the cb move
in  tcp_v4_rcv the tcp_v4_sdif helper is used.

Signed-off-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_hashtables.h | 31 +++++++++++++++++--------------
 include/net/tcp.h             | 10 ++++++++++
 net/dccp/ipv4.c               |  4 ++--
 net/ipv4/inet_hashtables.c    | 27 +++++++++++++++++----------
 net/ipv4/tcp_ipv4.c           | 13 ++++++++-----
 net/ipv4/udp.c                |  6 +++---
 net/netfilter/xt_TPROXY.c     |  2 +-
 7 files changed, 58 insertions(+), 35 deletions(-)

(limited to 'include/net')

diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index 5026b1f08bb8..2dbbbff5e1e3 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -221,16 +221,16 @@ struct sock *__inet_lookup_listener(struct net *net,
 				    const __be32 saddr, const __be16 sport,
 				    const __be32 daddr,
 				    const unsigned short hnum,
-				    const int dif);
+				    const int dif, const int sdif);
 
 static inline struct sock *inet_lookup_listener(struct net *net,
 		struct inet_hashinfo *hashinfo,
 		struct sk_buff *skb, int doff,
 		__be32 saddr, __be16 sport,
-		__be32 daddr, __be16 dport, int dif)
+		__be32 daddr, __be16 dport, int dif, int sdif)
 {
 	return __inet_lookup_listener(net, hashinfo, skb, doff, saddr, sport,
-				      daddr, ntohs(dport), dif);
+				      daddr, ntohs(dport), dif, sdif);
 }
 
 /* Socket demux engine toys. */
@@ -262,22 +262,24 @@ static inline struct sock *inet_lookup_listener(struct net *net,
 				   (((__force __u64)(__be32)(__daddr)) << 32) | \
 				   ((__force __u64)(__be32)(__saddr)))
 #endif /* __BIG_ENDIAN */
-#define INET_MATCH(__sk, __net, __cookie, __saddr, __daddr, __ports, __dif)	\
+#define INET_MATCH(__sk, __net, __cookie, __saddr, __daddr, __ports, __dif, __sdif) \
 	(((__sk)->sk_portpair == (__ports))			&&	\
 	 ((__sk)->sk_addrpair == (__cookie))			&&	\
 	 (!(__sk)->sk_bound_dev_if	||				\
-	   ((__sk)->sk_bound_dev_if == (__dif))) 		&& 	\
+	   ((__sk)->sk_bound_dev_if == (__dif))			||	\
+	   ((__sk)->sk_bound_dev_if == (__sdif)))		&&	\
 	 net_eq(sock_net(__sk), (__net)))
 #else /* 32-bit arch */
 #define INET_ADDR_COOKIE(__name, __saddr, __daddr) \
 	const int __name __deprecated __attribute__((unused))
 
-#define INET_MATCH(__sk, __net, __cookie, __saddr, __daddr, __ports, __dif) \
+#define INET_MATCH(__sk, __net, __cookie, __saddr, __daddr, __ports, __dif, __sdif) \
 	(((__sk)->sk_portpair == (__ports))		&&		\
 	 ((__sk)->sk_daddr	== (__saddr))		&&		\
 	 ((__sk)->sk_rcv_saddr	== (__daddr))		&&		\
 	 (!(__sk)->sk_bound_dev_if	||				\
-	   ((__sk)->sk_bound_dev_if == (__dif))) 	&&		\
+	   ((__sk)->sk_bound_dev_if == (__dif))		||		\
+	   ((__sk)->sk_bound_dev_if == (__sdif)))	&&		\
 	 net_eq(sock_net(__sk), (__net)))
 #endif /* 64-bit arch */
 
@@ -288,7 +290,7 @@ struct sock *__inet_lookup_established(struct net *net,
 				       struct inet_hashinfo *hashinfo,
 				       const __be32 saddr, const __be16 sport,
 				       const __be32 daddr, const u16 hnum,
-				       const int dif);
+				       const int dif, const int sdif);
 
 static inline struct sock *
 	inet_lookup_established(struct net *net, struct inet_hashinfo *hashinfo,
@@ -297,7 +299,7 @@ static inline struct sock *
 				const int dif)
 {
 	return __inet_lookup_established(net, hashinfo, saddr, sport, daddr,
-					 ntohs(dport), dif);
+					 ntohs(dport), dif, 0);
 }
 
 static inline struct sock *__inet_lookup(struct net *net,
@@ -305,20 +307,20 @@ static inline struct sock *__inet_lookup(struct net *net,
 					 struct sk_buff *skb, int doff,
 					 const __be32 saddr, const __be16 sport,
 					 const __be32 daddr, const __be16 dport,
-					 const int dif,
+					 const int dif, const int sdif,
 					 bool *refcounted)
 {
 	u16 hnum = ntohs(dport);
 	struct sock *sk;
 
 	sk = __inet_lookup_established(net, hashinfo, saddr, sport,
-				       daddr, hnum, dif);
+				       daddr, hnum, dif, sdif);
 	*refcounted = true;
 	if (sk)
 		return sk;
 	*refcounted = false;
 	return __inet_lookup_listener(net, hashinfo, skb, doff, saddr,
-				      sport, daddr, hnum, dif);
+				      sport, daddr, hnum, dif, sdif);
 }
 
 static inline struct sock *inet_lookup(struct net *net,
@@ -332,7 +334,7 @@ static inline struct sock *inet_lookup(struct net *net,
 	bool refcounted;
 
 	sk = __inet_lookup(net, hashinfo, skb, doff, saddr, sport, daddr,
-			   dport, dif, &refcounted);
+			   dport, dif, 0, &refcounted);
 
 	if (sk && !refcounted && !refcount_inc_not_zero(&sk->sk_refcnt))
 		sk = NULL;
@@ -344,6 +346,7 @@ static inline struct sock *__inet_lookup_skb(struct inet_hashinfo *hashinfo,
 					     int doff,
 					     const __be16 sport,
 					     const __be16 dport,
+					     const int sdif,
 					     bool *refcounted)
 {
 	struct sock *sk = skb_steal_sock(skb);
@@ -355,7 +358,7 @@ static inline struct sock *__inet_lookup_skb(struct inet_hashinfo *hashinfo,
 
 	return __inet_lookup(dev_net(skb_dst(skb)->dev), hashinfo, skb,
 			     doff, iph->saddr, sport,
-			     iph->daddr, dport, inet_iif(skb),
+			     iph->daddr, dport, inet_iif(skb), sdif,
 			     refcounted);
 }
 
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 5173fecde495..2b89f1ab8552 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -840,6 +840,16 @@ static inline bool inet_exact_dif_match(struct net *net, struct sk_buff *skb)
 	return false;
 }
 
+/* TCP_SKB_CB reference means this can not be used from early demux */
+static inline int tcp_v4_sdif(struct sk_buff *skb)
+{
+#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
+	if (skb && ipv4_l3mdev_skb(TCP_SKB_CB(skb)->header.h4.flags))
+		return TCP_SKB_CB(skb)->header.h4.iif;
+#endif
+	return 0;
+}
+
 /* Due to TSO, an SKB can be composed of multiple actual
  * packets.  To keep these tracked properly, we use this.
  */
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 1b202f16531f..001c08696334 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -256,7 +256,7 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info)
 	sk = __inet_lookup_established(net, &dccp_hashinfo,
 				       iph->daddr, dh->dccph_dport,
 				       iph->saddr, ntohs(dh->dccph_sport),
-				       inet_iif(skb));
+				       inet_iif(skb), 0);
 	if (!sk) {
 		__ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
 		return;
@@ -804,7 +804,7 @@ static int dccp_v4_rcv(struct sk_buff *skb)
 
 lookup:
 	sk = __inet_lookup_skb(&dccp_hashinfo, skb, __dccp_hdr_len(dh),
-			       dh->dccph_sport, dh->dccph_dport, &refcounted);
+			       dh->dccph_sport, dh->dccph_dport, 0, &refcounted);
 	if (!sk) {
 		dccp_pr_debug("failed to look up flow ID in table and "
 			      "get corresponding socket\n");
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 2e3389d614d1..597bb4cfe805 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -170,7 +170,7 @@ EXPORT_SYMBOL_GPL(__inet_inherit_port);
 
 static inline int compute_score(struct sock *sk, struct net *net,
 				const unsigned short hnum, const __be32 daddr,
-				const int dif, bool exact_dif)
+				const int dif, const int sdif, bool exact_dif)
 {
 	int score = -1;
 	struct inet_sock *inet = inet_sk(sk);
@@ -185,9 +185,13 @@ static inline int compute_score(struct sock *sk, struct net *net,
 			score += 4;
 		}
 		if (sk->sk_bound_dev_if || exact_dif) {
-			if (sk->sk_bound_dev_if != dif)
+			bool dev_match = (sk->sk_bound_dev_if == dif ||
+					  sk->sk_bound_dev_if == sdif);
+
+			if (exact_dif && !dev_match)
 				return -1;
-			score += 4;
+			if (sk->sk_bound_dev_if && dev_match)
+				score += 4;
 		}
 		if (sk->sk_incoming_cpu == raw_smp_processor_id())
 			score++;
@@ -208,7 +212,7 @@ struct sock *__inet_lookup_listener(struct net *net,
 				    struct sk_buff *skb, int doff,
 				    const __be32 saddr, __be16 sport,
 				    const __be32 daddr, const unsigned short hnum,
-				    const int dif)
+				    const int dif, const int sdif)
 {
 	unsigned int hash = inet_lhashfn(net, hnum);
 	struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash];
@@ -218,7 +222,8 @@ struct sock *__inet_lookup_listener(struct net *net,
 	u32 phash = 0;
 
 	sk_for_each_rcu(sk, &ilb->head) {
-		score = compute_score(sk, net, hnum, daddr, dif, exact_dif);
+		score = compute_score(sk, net, hnum, daddr,
+				      dif, sdif, exact_dif);
 		if (score > hiscore) {
 			reuseport = sk->sk_reuseport;
 			if (reuseport) {
@@ -268,7 +273,7 @@ struct sock *__inet_lookup_established(struct net *net,
 				  struct inet_hashinfo *hashinfo,
 				  const __be32 saddr, const __be16 sport,
 				  const __be32 daddr, const u16 hnum,
-				  const int dif)
+				  const int dif, const int sdif)
 {
 	INET_ADDR_COOKIE(acookie, saddr, daddr);
 	const __portpair ports = INET_COMBINED_PORTS(sport, hnum);
@@ -286,11 +291,12 @@ begin:
 		if (sk->sk_hash != hash)
 			continue;
 		if (likely(INET_MATCH(sk, net, acookie,
-				      saddr, daddr, ports, dif))) {
+				      saddr, daddr, ports, dif, sdif))) {
 			if (unlikely(!refcount_inc_not_zero(&sk->sk_refcnt)))
 				goto out;
 			if (unlikely(!INET_MATCH(sk, net, acookie,
-						 saddr, daddr, ports, dif))) {
+						 saddr, daddr, ports,
+						 dif, sdif))) {
 				sock_gen_put(sk);
 				goto begin;
 			}
@@ -321,9 +327,10 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row,
 	__be32 daddr = inet->inet_rcv_saddr;
 	__be32 saddr = inet->inet_daddr;
 	int dif = sk->sk_bound_dev_if;
+	struct net *net = sock_net(sk);
+	int sdif = l3mdev_master_ifindex_by_index(net, dif);
 	INET_ADDR_COOKIE(acookie, saddr, daddr);
 	const __portpair ports = INET_COMBINED_PORTS(inet->inet_dport, lport);
-	struct net *net = sock_net(sk);
 	unsigned int hash = inet_ehashfn(net, daddr, lport,
 					 saddr, inet->inet_dport);
 	struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash);
@@ -339,7 +346,7 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row,
 			continue;
 
 		if (likely(INET_MATCH(sk2, net, acookie,
-					 saddr, daddr, ports, dif))) {
+					 saddr, daddr, ports, dif, sdif))) {
 			if (sk2->sk_state == TCP_TIME_WAIT) {
 				tw = inet_twsk(sk2);
 				if (twsk_unique(sk, sk2, twp))
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 5f708c85110e..c8784ab37852 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -383,7 +383,7 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
 
 	sk = __inet_lookup_established(net, &tcp_hashinfo, iph->daddr,
 				       th->dest, iph->saddr, ntohs(th->source),
-				       inet_iif(icmp_skb));
+				       inet_iif(icmp_skb), 0);
 	if (!sk) {
 		__ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
 		return;
@@ -659,7 +659,8 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
 		sk1 = __inet_lookup_listener(net, &tcp_hashinfo, NULL, 0,
 					     ip_hdr(skb)->saddr,
 					     th->source, ip_hdr(skb)->daddr,
-					     ntohs(th->source), inet_iif(skb));
+					     ntohs(th->source), inet_iif(skb),
+					     tcp_v4_sdif(skb));
 		/* don't send rst if it can't find key */
 		if (!sk1)
 			goto out;
@@ -1523,7 +1524,7 @@ void tcp_v4_early_demux(struct sk_buff *skb)
 	sk = __inet_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
 				       iph->saddr, th->source,
 				       iph->daddr, ntohs(th->dest),
-				       skb->skb_iif);
+				       skb->skb_iif, inet_sdif(skb));
 	if (sk) {
 		skb->sk = sk;
 		skb->destructor = sock_edemux;
@@ -1588,6 +1589,7 @@ EXPORT_SYMBOL(tcp_filter);
 int tcp_v4_rcv(struct sk_buff *skb)
 {
 	struct net *net = dev_net(skb->dev);
+	int sdif = inet_sdif(skb);
 	const struct iphdr *iph;
 	const struct tcphdr *th;
 	bool refcounted;
@@ -1638,7 +1640,7 @@ int tcp_v4_rcv(struct sk_buff *skb)
 
 lookup:
 	sk = __inet_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th), th->source,
-			       th->dest, &refcounted);
+			       th->dest, sdif, &refcounted);
 	if (!sk)
 		goto no_tcp_socket;
 
@@ -1766,7 +1768,8 @@ do_time_wait:
 							__tcp_hdrlen(th),
 							iph->saddr, th->source,
 							iph->daddr, th->dest,
-							inet_iif(skb));
+							inet_iif(skb),
+							sdif);
 		if (sk2) {
 			inet_twsk_deschedule_put(inet_twsk(sk));
 			sk = sk2;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index fe14429e4a6c..99f25bfec606 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -2196,7 +2196,7 @@ static struct sock *__udp4_lib_mcast_demux_lookup(struct net *net,
 static struct sock *__udp4_lib_demux_lookup(struct net *net,
 					    __be16 loc_port, __be32 loc_addr,
 					    __be16 rmt_port, __be32 rmt_addr,
-					    int dif)
+					    int dif, int sdif)
 {
 	unsigned short hnum = ntohs(loc_port);
 	unsigned int hash2 = udp4_portaddr_hash(net, loc_addr, hnum);
@@ -2208,7 +2208,7 @@ static struct sock *__udp4_lib_demux_lookup(struct net *net,
 
 	udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
 		if (INET_MATCH(sk, net, acookie, rmt_addr,
-			       loc_addr, ports, dif))
+			       loc_addr, ports, dif, sdif))
 			return sk;
 		/* Only check first socket in chain */
 		break;
@@ -2254,7 +2254,7 @@ void udp_v4_early_demux(struct sk_buff *skb)
 						   dif, sdif);
 	} else if (skb->pkt_type == PACKET_HOST) {
 		sk = __udp4_lib_demux_lookup(net, uh->dest, iph->daddr,
-					     uh->source, iph->saddr, dif);
+					     uh->source, iph->saddr, dif, sdif);
 	}
 
 	if (!sk || !refcount_inc_not_zero(&sk->sk_refcnt))
diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c
index d767e35fff6b..94fb0fd0c667 100644
--- a/net/netfilter/xt_TPROXY.c
+++ b/net/netfilter/xt_TPROXY.c
@@ -125,7 +125,7 @@ nf_tproxy_get_sock_v4(struct net *net, struct sk_buff *skb, void *hp,
 						      __tcp_hdrlen(tcph),
 						    saddr, sport,
 						    daddr, dport,
-						    in->ifindex);
+						    in->ifindex, 0);
 
 			if (sk && !refcount_inc_not_zero(&sk->sk_refcnt))
 				sk = NULL;
-- 
cgit v1.2.3


From 67359930e185c491b47cb958d5f1d6c1af4598a2 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Mon, 7 Aug 2017 08:44:18 -0700
Subject: net: ipv4: add second dif to raw socket lookups

Add a second device index, sdif, to raw socket lookups. sdif is the
index for ingress devices enslaved to an l3mdev. It allows the lookups
to consider the enslaved device as well as the L3 domain when searching
for a socket.

Signed-off-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/raw.h   |  2 +-
 net/ipv4/raw.c      | 16 +++++++++++-----
 net/ipv4/raw_diag.c |  2 +-
 3 files changed, 13 insertions(+), 7 deletions(-)

(limited to 'include/net')

diff --git a/include/net/raw.h b/include/net/raw.h
index 57c33dd22ec4..99d26d0c4a19 100644
--- a/include/net/raw.h
+++ b/include/net/raw.h
@@ -26,7 +26,7 @@ extern struct proto raw_prot;
 extern struct raw_hashinfo raw_v4_hashinfo;
 struct sock *__raw_v4_lookup(struct net *net, struct sock *sk,
 			     unsigned short num, __be32 raddr,
-			     __be32 laddr, int dif);
+			     __be32 laddr, int dif, int sdif);
 
 int raw_abort(struct sock *sk, int err);
 void raw_icmp_error(struct sk_buff *, int, u32);
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index b0bb5d0a30bd..2726aecf224b 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -122,7 +122,8 @@ void raw_unhash_sk(struct sock *sk)
 EXPORT_SYMBOL_GPL(raw_unhash_sk);
 
 struct sock *__raw_v4_lookup(struct net *net, struct sock *sk,
-		unsigned short num, __be32 raddr, __be32 laddr, int dif)
+			     unsigned short num, __be32 raddr, __be32 laddr,
+			     int dif, int sdif)
 {
 	sk_for_each_from(sk) {
 		struct inet_sock *inet = inet_sk(sk);
@@ -130,7 +131,8 @@ struct sock *__raw_v4_lookup(struct net *net, struct sock *sk,
 		if (net_eq(sock_net(sk), net) && inet->inet_num == num	&&
 		    !(inet->inet_daddr && inet->inet_daddr != raddr) 	&&
 		    !(inet->inet_rcv_saddr && inet->inet_rcv_saddr != laddr) &&
-		    !(sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif))
+		    !(sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif &&
+		      sk->sk_bound_dev_if != sdif))
 			goto found; /* gotcha */
 	}
 	sk = NULL;
@@ -171,6 +173,7 @@ static int icmp_filter(const struct sock *sk, const struct sk_buff *skb)
  */
 static int raw_v4_input(struct sk_buff *skb, const struct iphdr *iph, int hash)
 {
+	int sdif = inet_sdif(skb);
 	struct sock *sk;
 	struct hlist_head *head;
 	int delivered = 0;
@@ -184,7 +187,7 @@ static int raw_v4_input(struct sk_buff *skb, const struct iphdr *iph, int hash)
 	net = dev_net(skb->dev);
 	sk = __raw_v4_lookup(net, __sk_head(head), iph->protocol,
 			     iph->saddr, iph->daddr,
-			     skb->dev->ifindex);
+			     skb->dev->ifindex, sdif);
 
 	while (sk) {
 		delivered = 1;
@@ -199,7 +202,7 @@ static int raw_v4_input(struct sk_buff *skb, const struct iphdr *iph, int hash)
 		}
 		sk = __raw_v4_lookup(net, sk_next(sk), iph->protocol,
 				     iph->saddr, iph->daddr,
-				     skb->dev->ifindex);
+				     skb->dev->ifindex, sdif);
 	}
 out:
 	read_unlock(&raw_v4_hashinfo.lock);
@@ -297,12 +300,15 @@ void raw_icmp_error(struct sk_buff *skb, int protocol, u32 info)
 	read_lock(&raw_v4_hashinfo.lock);
 	raw_sk = sk_head(&raw_v4_hashinfo.ht[hash]);
 	if (raw_sk) {
+		int dif = skb->dev->ifindex;
+		int sdif = inet_sdif(skb);
+
 		iph = (const struct iphdr *)skb->data;
 		net = dev_net(skb->dev);
 
 		while ((raw_sk = __raw_v4_lookup(net, raw_sk, protocol,
 						iph->daddr, iph->saddr,
-						skb->dev->ifindex)) != NULL) {
+						dif, sdif)) != NULL) {
 			raw_err(raw_sk, skb, info);
 			raw_sk = sk_next(raw_sk);
 			iph = (const struct iphdr *)skb->data;
diff --git a/net/ipv4/raw_diag.c b/net/ipv4/raw_diag.c
index e1a51ca68d23..c600d3c71d4d 100644
--- a/net/ipv4/raw_diag.c
+++ b/net/ipv4/raw_diag.c
@@ -46,7 +46,7 @@ static struct sock *raw_lookup(struct net *net, struct sock *from,
 		sk = __raw_v4_lookup(net, from, r->sdiag_raw_protocol,
 				     r->id.idiag_dst[0],
 				     r->id.idiag_src[0],
-				     r->id.idiag_if);
+				     r->id.idiag_if, 0);
 #if IS_ENABLED(CONFIG_IPV6)
 	else
 		sk = __raw_v6_lookup(net, from, r->sdiag_raw_protocol,
-- 
cgit v1.2.3


From 1801b570dd2ae50b90231f283e79a9a94fbe7875 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Mon, 7 Aug 2017 08:44:20 -0700
Subject: net: ipv6: add second dif to udp socket lookups

Add a second device index, sdif, to udp socket lookups. sdif is the
index for ingress devices enslaved to an l3mdev. It allows the lookups
to consider the enslaved device as well as the L3 domain when searching
for a socket.

Early demux lookups are handled in the next patch as part of INET_MATCH
changes.

Signed-off-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ipv6.h | 10 ++++++++++
 include/net/udp.h    |  2 +-
 net/ipv4/udp_diag.c  |  4 ++--
 net/ipv6/udp.c       | 40 ++++++++++++++++++++++------------------
 4 files changed, 35 insertions(+), 21 deletions(-)

(limited to 'include/net')

diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 474d6bbc158c..ac2da4e11d5e 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -158,6 +158,16 @@ static inline bool inet6_is_jumbogram(const struct sk_buff *skb)
 	return !!(IP6CB(skb)->flags & IP6SKB_JUMBOGRAM);
 }
 
+/* can not be used in TCP layer after tcp_v6_fill_cb */
+static inline int inet6_sdif(const struct sk_buff *skb)
+{
+#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
+	if (skb && ipv6_l3mdev_skb(IP6CB(skb)->flags))
+		return IP6CB(skb)->iif;
+#endif
+	return 0;
+}
+
 /* can not be used in TCP layer after tcp_v6_fill_cb */
 static inline bool inet6_exact_dif_match(struct net *net, struct sk_buff *skb)
 {
diff --git a/include/net/udp.h b/include/net/udp.h
index 826c713d5a48..20dcdca4e85c 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -298,7 +298,7 @@ struct sock *udp6_lib_lookup(struct net *net,
 struct sock *__udp6_lib_lookup(struct net *net,
 			       const struct in6_addr *saddr, __be16 sport,
 			       const struct in6_addr *daddr, __be16 dport,
-			       int dif, struct udp_table *tbl,
+			       int dif, int sdif, struct udp_table *tbl,
 			       struct sk_buff *skb);
 struct sock *udp6_lib_lookup_skb(struct sk_buff *skb,
 				 __be16 sport, __be16 dport);
diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c
index 1f07fe109535..d0390d844ac8 100644
--- a/net/ipv4/udp_diag.c
+++ b/net/ipv4/udp_diag.c
@@ -53,7 +53,7 @@ static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb,
 				req->id.idiag_sport,
 				(struct in6_addr *)req->id.idiag_dst,
 				req->id.idiag_dport,
-				req->id.idiag_if, tbl, NULL);
+				req->id.idiag_if, 0, tbl, NULL);
 #endif
 	if (sk && !refcount_inc_not_zero(&sk->sk_refcnt))
 		sk = NULL;
@@ -198,7 +198,7 @@ static int __udp_diag_destroy(struct sk_buff *in_skb,
 					req->id.idiag_dport,
 					(struct in6_addr *)req->id.idiag_src,
 					req->id.idiag_sport,
-					req->id.idiag_if, tbl, NULL);
+					req->id.idiag_if, 0, tbl, NULL);
 	}
 #endif
 	else {
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 578142b7ca3e..d96a877798a7 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -129,7 +129,7 @@ static void udp_v6_rehash(struct sock *sk)
 static int compute_score(struct sock *sk, struct net *net,
 			 const struct in6_addr *saddr, __be16 sport,
 			 const struct in6_addr *daddr, unsigned short hnum,
-			 int dif, bool exact_dif)
+			 int dif, int sdif, bool exact_dif)
 {
 	int score;
 	struct inet_sock *inet;
@@ -161,9 +161,13 @@ static int compute_score(struct sock *sk, struct net *net,
 	}
 
 	if (sk->sk_bound_dev_if || exact_dif) {
-		if (sk->sk_bound_dev_if != dif)
+		bool dev_match = (sk->sk_bound_dev_if == dif ||
+				  sk->sk_bound_dev_if == sdif);
+
+		if (exact_dif && !dev_match)
 			return -1;
-		score++;
+		if (sk->sk_bound_dev_if && dev_match)
+			score++;
 	}
 
 	if (sk->sk_incoming_cpu == raw_smp_processor_id())
@@ -175,9 +179,9 @@ static int compute_score(struct sock *sk, struct net *net,
 /* called with rcu_read_lock() */
 static struct sock *udp6_lib_lookup2(struct net *net,
 		const struct in6_addr *saddr, __be16 sport,
-		const struct in6_addr *daddr, unsigned int hnum, int dif,
-		bool exact_dif, struct udp_hslot *hslot2,
-		struct sk_buff *skb)
+		const struct in6_addr *daddr, unsigned int hnum,
+		int dif, int sdif, bool exact_dif,
+		struct udp_hslot *hslot2, struct sk_buff *skb)
 {
 	struct sock *sk, *result;
 	int score, badness, matches = 0, reuseport = 0;
@@ -187,7 +191,7 @@ static struct sock *udp6_lib_lookup2(struct net *net,
 	badness = -1;
 	udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
 		score = compute_score(sk, net, saddr, sport,
-				      daddr, hnum, dif, exact_dif);
+				      daddr, hnum, dif, sdif, exact_dif);
 		if (score > badness) {
 			reuseport = sk->sk_reuseport;
 			if (reuseport) {
@@ -214,10 +218,10 @@ static struct sock *udp6_lib_lookup2(struct net *net,
 
 /* rcu_read_lock() must be held */
 struct sock *__udp6_lib_lookup(struct net *net,
-				      const struct in6_addr *saddr, __be16 sport,
-				      const struct in6_addr *daddr, __be16 dport,
-				      int dif, struct udp_table *udptable,
-				      struct sk_buff *skb)
+			       const struct in6_addr *saddr, __be16 sport,
+			       const struct in6_addr *daddr, __be16 dport,
+			       int dif, int sdif, struct udp_table *udptable,
+			       struct sk_buff *skb)
 {
 	struct sock *sk, *result;
 	unsigned short hnum = ntohs(dport);
@@ -235,7 +239,7 @@ struct sock *__udp6_lib_lookup(struct net *net,
 			goto begin;
 
 		result = udp6_lib_lookup2(net, saddr, sport,
-					  daddr, hnum, dif, exact_dif,
+					  daddr, hnum, dif, sdif, exact_dif,
 					  hslot2, skb);
 		if (!result) {
 			unsigned int old_slot2 = slot2;
@@ -250,7 +254,7 @@ struct sock *__udp6_lib_lookup(struct net *net,
 				goto begin;
 
 			result = udp6_lib_lookup2(net, saddr, sport,
-						  daddr, hnum, dif,
+						  daddr, hnum, dif, sdif,
 						  exact_dif, hslot2,
 						  skb);
 		}
@@ -261,7 +265,7 @@ begin:
 	badness = -1;
 	sk_for_each_rcu(sk, &hslot->head) {
 		score = compute_score(sk, net, saddr, sport, daddr, hnum, dif,
-				      exact_dif);
+				      sdif, exact_dif);
 		if (score > badness) {
 			reuseport = sk->sk_reuseport;
 			if (reuseport) {
@@ -294,7 +298,7 @@ static struct sock *__udp6_lib_lookup_skb(struct sk_buff *skb,
 
 	return __udp6_lib_lookup(dev_net(skb->dev), &iph->saddr, sport,
 				 &iph->daddr, dport, inet6_iif(skb),
-				 udptable, skb);
+				 inet6_sdif(skb), udptable, skb);
 }
 
 struct sock *udp6_lib_lookup_skb(struct sk_buff *skb,
@@ -304,7 +308,7 @@ struct sock *udp6_lib_lookup_skb(struct sk_buff *skb,
 
 	return __udp6_lib_lookup(dev_net(skb->dev), &iph->saddr, sport,
 				 &iph->daddr, dport, inet6_iif(skb),
-				 &udp_table, skb);
+				 inet6_sdif(skb), &udp_table, skb);
 }
 EXPORT_SYMBOL_GPL(udp6_lib_lookup_skb);
 
@@ -320,7 +324,7 @@ struct sock *udp6_lib_lookup(struct net *net, const struct in6_addr *saddr, __be
 	struct sock *sk;
 
 	sk =  __udp6_lib_lookup(net, saddr, sport, daddr, dport,
-				dif, &udp_table, NULL);
+				dif, 0, &udp_table, NULL);
 	if (sk && !refcount_inc_not_zero(&sk->sk_refcnt))
 		sk = NULL;
 	return sk;
@@ -501,7 +505,7 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 	struct net *net = dev_net(skb->dev);
 
 	sk = __udp6_lib_lookup(net, daddr, uh->dest, saddr, uh->source,
-			       inet6_iif(skb), udptable, skb);
+			       inet6_iif(skb), 0, udptable, skb);
 	if (!sk) {
 		__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
 				  ICMP6_MIB_INERRORS);
-- 
cgit v1.2.3


From 4297a0ef085729af98adab9131d128c576ed3044 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Mon, 7 Aug 2017 08:44:21 -0700
Subject: net: ipv6: add second dif to inet6 socket lookups

Add a second device index, sdif, to inet6 socket lookups. sdif is the
index for ingress devices enslaved to an l3mdev. It allows the lookups
to consider the enslaved device as well as the L3 domain when searching
for a socket.

TCP moves the data in the cb. Prior to tcp_v4_rcv (e.g., early demux) the
ingress index is obtained from IPCB using inet_sdif and after tcp_v4_rcv
tcp_v4_sdif is used.

Signed-off-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet6_hashtables.h | 22 +++++++++++++---------
 include/net/tcp.h              | 10 ++++++++++
 net/dccp/ipv6.c                |  4 ++--
 net/ipv6/inet6_hashtables.c    | 28 +++++++++++++++++-----------
 net/ipv6/tcp_ipv6.c            | 13 ++++++++-----
 net/ipv6/udp.c                 |  7 ++++---
 net/netfilter/xt_TPROXY.c      |  4 ++--
 7 files changed, 56 insertions(+), 32 deletions(-)

(limited to 'include/net')

diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h
index b87becacd9d3..6e91e38a31da 100644
--- a/include/net/inet6_hashtables.h
+++ b/include/net/inet6_hashtables.h
@@ -49,7 +49,8 @@ struct sock *__inet6_lookup_established(struct net *net,
 					const struct in6_addr *saddr,
 					const __be16 sport,
 					const struct in6_addr *daddr,
-					const u16 hnum, const int dif);
+					const u16 hnum, const int dif,
+					const int sdif);
 
 struct sock *inet6_lookup_listener(struct net *net,
 				   struct inet_hashinfo *hashinfo,
@@ -57,7 +58,8 @@ struct sock *inet6_lookup_listener(struct net *net,
 				   const struct in6_addr *saddr,
 				   const __be16 sport,
 				   const struct in6_addr *daddr,
-				   const unsigned short hnum, const int dif);
+				   const unsigned short hnum,
+				   const int dif, const int sdif);
 
 static inline struct sock *__inet6_lookup(struct net *net,
 					  struct inet_hashinfo *hashinfo,
@@ -66,24 +68,25 @@ static inline struct sock *__inet6_lookup(struct net *net,
 					  const __be16 sport,
 					  const struct in6_addr *daddr,
 					  const u16 hnum,
-					  const int dif,
+					  const int dif, const int sdif,
 					  bool *refcounted)
 {
 	struct sock *sk = __inet6_lookup_established(net, hashinfo, saddr,
-						sport, daddr, hnum, dif);
+						     sport, daddr, hnum,
+						     dif, sdif);
 	*refcounted = true;
 	if (sk)
 		return sk;
 	*refcounted = false;
 	return inet6_lookup_listener(net, hashinfo, skb, doff, saddr, sport,
-				     daddr, hnum, dif);
+				     daddr, hnum, dif, sdif);
 }
 
 static inline struct sock *__inet6_lookup_skb(struct inet_hashinfo *hashinfo,
 					      struct sk_buff *skb, int doff,
 					      const __be16 sport,
 					      const __be16 dport,
-					      int iif,
+					      int iif, int sdif,
 					      bool *refcounted)
 {
 	struct sock *sk = skb_steal_sock(skb);
@@ -95,7 +98,7 @@ static inline struct sock *__inet6_lookup_skb(struct inet_hashinfo *hashinfo,
 	return __inet6_lookup(dev_net(skb_dst(skb)->dev), hashinfo, skb,
 			      doff, &ipv6_hdr(skb)->saddr, sport,
 			      &ipv6_hdr(skb)->daddr, ntohs(dport),
-			      iif, refcounted);
+			      iif, sdif, refcounted);
 }
 
 struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo,
@@ -107,13 +110,14 @@ struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo,
 int inet6_hash(struct sock *sk);
 #endif /* IS_ENABLED(CONFIG_IPV6) */
 
-#define INET6_MATCH(__sk, __net, __saddr, __daddr, __ports, __dif)	\
+#define INET6_MATCH(__sk, __net, __saddr, __daddr, __ports, __dif, __sdif) \
 	(((__sk)->sk_portpair == (__ports))			&&	\
 	 ((__sk)->sk_family == AF_INET6)			&&	\
 	 ipv6_addr_equal(&(__sk)->sk_v6_daddr, (__saddr))		&&	\
 	 ipv6_addr_equal(&(__sk)->sk_v6_rcv_saddr, (__daddr))	&&	\
 	 (!(__sk)->sk_bound_dev_if	||				\
-	   ((__sk)->sk_bound_dev_if == (__dif))) 		&&	\
+	   ((__sk)->sk_bound_dev_if == (__dif))	||			\
+	   ((__sk)->sk_bound_dev_if == (__sdif)))		&&	\
 	 net_eq(sock_net(__sk), (__net)))
 
 #endif /* _INET6_HASHTABLES_H */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 2b89f1ab8552..999f3efe572b 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -827,6 +827,16 @@ static inline int tcp_v6_iif(const struct sk_buff *skb)
 
 	return l3_slave ? skb->skb_iif : TCP_SKB_CB(skb)->header.h6.iif;
 }
+
+/* TCP_SKB_CB reference means this can not be used from early demux */
+static inline int tcp_v6_sdif(const struct sk_buff *skb)
+{
+#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
+	if (skb && ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags))
+		return TCP_SKB_CB(skb)->header.h6.iif;
+#endif
+	return 0;
+}
 #endif
 
 /* TCP_SKB_CB reference means this can not be used from early demux */
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 1b58eac8aad3..47a7b59b355e 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -89,7 +89,7 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 	sk = __inet6_lookup_established(net, &dccp_hashinfo,
 					&hdr->daddr, dh->dccph_dport,
 					&hdr->saddr, ntohs(dh->dccph_sport),
-					inet6_iif(skb));
+					inet6_iif(skb), 0);
 
 	if (!sk) {
 		__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
@@ -687,7 +687,7 @@ static int dccp_v6_rcv(struct sk_buff *skb)
 lookup:
 	sk = __inet6_lookup_skb(&dccp_hashinfo, skb, __dccp_hdr_len(dh),
 			        dh->dccph_sport, dh->dccph_dport,
-				inet6_iif(skb), &refcounted);
+				inet6_iif(skb), 0, &refcounted);
 	if (!sk) {
 		dccp_pr_debug("failed to look up flow ID in table and "
 			      "get corresponding socket\n");
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index b13b8f93079d..b01858f5deb1 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -56,7 +56,7 @@ struct sock *__inet6_lookup_established(struct net *net,
 					   const __be16 sport,
 					   const struct in6_addr *daddr,
 					   const u16 hnum,
-					   const int dif)
+					   const int dif, const int sdif)
 {
 	struct sock *sk;
 	const struct hlist_nulls_node *node;
@@ -73,12 +73,12 @@ begin:
 	sk_nulls_for_each_rcu(sk, node, &head->chain) {
 		if (sk->sk_hash != hash)
 			continue;
-		if (!INET6_MATCH(sk, net, saddr, daddr, ports, dif))
+		if (!INET6_MATCH(sk, net, saddr, daddr, ports, dif, sdif))
 			continue;
 		if (unlikely(!refcount_inc_not_zero(&sk->sk_refcnt)))
 			goto out;
 
-		if (unlikely(!INET6_MATCH(sk, net, saddr, daddr, ports, dif))) {
+		if (unlikely(!INET6_MATCH(sk, net, saddr, daddr, ports, dif, sdif))) {
 			sock_gen_put(sk);
 			goto begin;
 		}
@@ -96,7 +96,7 @@ EXPORT_SYMBOL(__inet6_lookup_established);
 static inline int compute_score(struct sock *sk, struct net *net,
 				const unsigned short hnum,
 				const struct in6_addr *daddr,
-				const int dif, bool exact_dif)
+				const int dif, const int sdif, bool exact_dif)
 {
 	int score = -1;
 
@@ -110,9 +110,13 @@ static inline int compute_score(struct sock *sk, struct net *net,
 			score++;
 		}
 		if (sk->sk_bound_dev_if || exact_dif) {
-			if (sk->sk_bound_dev_if != dif)
+			bool dev_match = (sk->sk_bound_dev_if == dif ||
+					  sk->sk_bound_dev_if == sdif);
+
+			if (exact_dif && !dev_match)
 				return -1;
-			score++;
+			if (sk->sk_bound_dev_if && dev_match)
+				score++;
 		}
 		if (sk->sk_incoming_cpu == raw_smp_processor_id())
 			score++;
@@ -126,7 +130,7 @@ struct sock *inet6_lookup_listener(struct net *net,
 		struct sk_buff *skb, int doff,
 		const struct in6_addr *saddr,
 		const __be16 sport, const struct in6_addr *daddr,
-		const unsigned short hnum, const int dif)
+		const unsigned short hnum, const int dif, const int sdif)
 {
 	unsigned int hash = inet_lhashfn(net, hnum);
 	struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash];
@@ -136,7 +140,7 @@ struct sock *inet6_lookup_listener(struct net *net,
 	u32 phash = 0;
 
 	sk_for_each(sk, &ilb->head) {
-		score = compute_score(sk, net, hnum, daddr, dif, exact_dif);
+		score = compute_score(sk, net, hnum, daddr, dif, sdif, exact_dif);
 		if (score > hiscore) {
 			reuseport = sk->sk_reuseport;
 			if (reuseport) {
@@ -171,7 +175,7 @@ struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo,
 	bool refcounted;
 
 	sk = __inet6_lookup(net, hashinfo, skb, doff, saddr, sport, daddr,
-			    ntohs(dport), dif, &refcounted);
+			    ntohs(dport), dif, 0, &refcounted);
 	if (sk && !refcounted && !refcount_inc_not_zero(&sk->sk_refcnt))
 		sk = NULL;
 	return sk;
@@ -187,8 +191,9 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row,
 	const struct in6_addr *daddr = &sk->sk_v6_rcv_saddr;
 	const struct in6_addr *saddr = &sk->sk_v6_daddr;
 	const int dif = sk->sk_bound_dev_if;
-	const __portpair ports = INET_COMBINED_PORTS(inet->inet_dport, lport);
 	struct net *net = sock_net(sk);
+	const int sdif = l3mdev_master_ifindex_by_index(net, dif);
+	const __portpair ports = INET_COMBINED_PORTS(inet->inet_dport, lport);
 	const unsigned int hash = inet6_ehashfn(net, daddr, lport, saddr,
 						inet->inet_dport);
 	struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash);
@@ -203,7 +208,8 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row,
 		if (sk2->sk_hash != hash)
 			continue;
 
-		if (likely(INET6_MATCH(sk2, net, saddr, daddr, ports, dif))) {
+		if (likely(INET6_MATCH(sk2, net, saddr, daddr, ports,
+				       dif, sdif))) {
 			if (sk2->sk_state == TCP_TIME_WAIT) {
 				tw = inet_twsk(sk2);
 				if (twsk_unique(sk, sk2, twp))
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index ced5dcf37465..f776ec4ecf6d 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -350,7 +350,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 	sk = __inet6_lookup_established(net, &tcp_hashinfo,
 					&hdr->daddr, th->dest,
 					&hdr->saddr, ntohs(th->source),
-					skb->dev->ifindex);
+					skb->dev->ifindex, inet6_sdif(skb));
 
 	if (!sk) {
 		__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
@@ -918,7 +918,8 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
 					   &tcp_hashinfo, NULL, 0,
 					   &ipv6h->saddr,
 					   th->source, &ipv6h->daddr,
-					   ntohs(th->source), tcp_v6_iif(skb));
+					   ntohs(th->source), tcp_v6_iif(skb),
+					   tcp_v6_sdif(skb));
 		if (!sk1)
 			goto out;
 
@@ -1397,6 +1398,7 @@ static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
 
 static int tcp_v6_rcv(struct sk_buff *skb)
 {
+	int sdif = inet6_sdif(skb);
 	const struct tcphdr *th;
 	const struct ipv6hdr *hdr;
 	bool refcounted;
@@ -1430,7 +1432,7 @@ static int tcp_v6_rcv(struct sk_buff *skb)
 
 lookup:
 	sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th),
-				th->source, th->dest, inet6_iif(skb),
+				th->source, th->dest, inet6_iif(skb), sdif,
 				&refcounted);
 	if (!sk)
 		goto no_tcp_socket;
@@ -1563,7 +1565,8 @@ do_time_wait:
 					    skb, __tcp_hdrlen(th),
 					    &ipv6_hdr(skb)->saddr, th->source,
 					    &ipv6_hdr(skb)->daddr,
-					    ntohs(th->dest), tcp_v6_iif(skb));
+					    ntohs(th->dest), tcp_v6_iif(skb),
+					    sdif);
 		if (sk2) {
 			struct inet_timewait_sock *tw = inet_twsk(sk);
 			inet_twsk_deschedule_put(tw);
@@ -1610,7 +1613,7 @@ static void tcp_v6_early_demux(struct sk_buff *skb)
 	sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
 					&hdr->saddr, th->source,
 					&hdr->daddr, ntohs(th->dest),
-					inet6_iif(skb));
+					inet6_iif(skb), inet6_sdif(skb));
 	if (sk) {
 		skb->sk = sk;
 		skb->destructor = sock_edemux;
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index d96a877798a7..19afcaf4a22e 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -897,7 +897,7 @@ discard:
 static struct sock *__udp6_lib_demux_lookup(struct net *net,
 			__be16 loc_port, const struct in6_addr *loc_addr,
 			__be16 rmt_port, const struct in6_addr *rmt_addr,
-			int dif)
+			int dif, int sdif)
 {
 	unsigned short hnum = ntohs(loc_port);
 	unsigned int hash2 = udp6_portaddr_hash(net, loc_addr, hnum);
@@ -908,7 +908,7 @@ static struct sock *__udp6_lib_demux_lookup(struct net *net,
 
 	udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
 		if (sk->sk_state == TCP_ESTABLISHED &&
-		    INET6_MATCH(sk, net, rmt_addr, loc_addr, ports, dif))
+		    INET6_MATCH(sk, net, rmt_addr, loc_addr, ports, dif, sdif))
 			return sk;
 		/* Only check first socket in chain */
 		break;
@@ -923,6 +923,7 @@ static void udp_v6_early_demux(struct sk_buff *skb)
 	struct sock *sk;
 	struct dst_entry *dst;
 	int dif = skb->dev->ifindex;
+	int sdif = inet6_sdif(skb);
 
 	if (!pskb_may_pull(skb, skb_transport_offset(skb) +
 	    sizeof(struct udphdr)))
@@ -934,7 +935,7 @@ static void udp_v6_early_demux(struct sk_buff *skb)
 		sk = __udp6_lib_demux_lookup(net, uh->dest,
 					     &ipv6_hdr(skb)->daddr,
 					     uh->source, &ipv6_hdr(skb)->saddr,
-					     dif);
+					     dif, sdif);
 	else
 		return;
 
diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c
index 94fb0fd0c667..ade4c10c28c6 100644
--- a/net/netfilter/xt_TPROXY.c
+++ b/net/netfilter/xt_TPROXY.c
@@ -195,7 +195,7 @@ nf_tproxy_get_sock_v6(struct net *net, struct sk_buff *skb, int thoff, void *hp,
 						   thoff + __tcp_hdrlen(tcph),
 						   saddr, sport,
 						   daddr, ntohs(dport),
-						   in->ifindex);
+						   in->ifindex, 0);
 
 			if (sk && !refcount_inc_not_zero(&sk->sk_refcnt))
 				sk = NULL;
@@ -208,7 +208,7 @@ nf_tproxy_get_sock_v6(struct net *net, struct sk_buff *skb, int thoff, void *hp,
 		case NFT_LOOKUP_ESTABLISHED:
 			sk = __inet6_lookup_established(net, &tcp_hashinfo,
 							saddr, sport, daddr, ntohs(dport),
-							in->ifindex);
+							in->ifindex, 0);
 			break;
 		default:
 			BUG();
-- 
cgit v1.2.3


From 5108ab4bf446fa9ad2c71f5fc1d839067b72636f Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Mon, 7 Aug 2017 08:44:22 -0700
Subject: net: ipv6: add second dif to raw socket lookups

Add a second device index, sdif, to raw socket lookups. sdif is the
index for ingress devices enslaved to an l3mdev. It allows the lookups
to consider the enslaved device as well as the L3 domain when searching
for a socket.

Signed-off-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/rawv6.h |  2 +-
 net/ipv4/raw_diag.c |  2 +-
 net/ipv6/raw.c      | 13 ++++++++-----
 3 files changed, 10 insertions(+), 7 deletions(-)

(limited to 'include/net')

diff --git a/include/net/rawv6.h b/include/net/rawv6.h
index cbe4e9de1894..4addc5c988e0 100644
--- a/include/net/rawv6.h
+++ b/include/net/rawv6.h
@@ -6,7 +6,7 @@
 extern struct raw_hashinfo raw_v6_hashinfo;
 struct sock *__raw_v6_lookup(struct net *net, struct sock *sk,
 			     unsigned short num, const struct in6_addr *loc_addr,
-			     const struct in6_addr *rmt_addr, int dif);
+			     const struct in6_addr *rmt_addr, int dif, int sdif);
 
 int raw_abort(struct sock *sk, int err);
 
diff --git a/net/ipv4/raw_diag.c b/net/ipv4/raw_diag.c
index c600d3c71d4d..c200065ef9a5 100644
--- a/net/ipv4/raw_diag.c
+++ b/net/ipv4/raw_diag.c
@@ -52,7 +52,7 @@ static struct sock *raw_lookup(struct net *net, struct sock *from,
 		sk = __raw_v6_lookup(net, from, r->sdiag_raw_protocol,
 				     (const struct in6_addr *)r->id.idiag_src,
 				     (const struct in6_addr *)r->id.idiag_dst,
-				     r->id.idiag_if);
+				     r->id.idiag_if, 0);
 #endif
 	return sk;
 }
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 60be012fe708..e4462b0ff801 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -72,7 +72,7 @@ EXPORT_SYMBOL_GPL(raw_v6_hashinfo);
 
 struct sock *__raw_v6_lookup(struct net *net, struct sock *sk,
 		unsigned short num, const struct in6_addr *loc_addr,
-		const struct in6_addr *rmt_addr, int dif)
+		const struct in6_addr *rmt_addr, int dif, int sdif)
 {
 	bool is_multicast = ipv6_addr_is_multicast(loc_addr);
 
@@ -86,7 +86,9 @@ struct sock *__raw_v6_lookup(struct net *net, struct sock *sk,
 			    !ipv6_addr_equal(&sk->sk_v6_daddr, rmt_addr))
 				continue;
 
-			if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif)
+			if (sk->sk_bound_dev_if &&
+			    sk->sk_bound_dev_if != dif &&
+			    sk->sk_bound_dev_if != sdif)
 				continue;
 
 			if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) {
@@ -178,7 +180,8 @@ static bool ipv6_raw_deliver(struct sk_buff *skb, int nexthdr)
 		goto out;
 
 	net = dev_net(skb->dev);
-	sk = __raw_v6_lookup(net, sk, nexthdr, daddr, saddr, inet6_iif(skb));
+	sk = __raw_v6_lookup(net, sk, nexthdr, daddr, saddr,
+			     inet6_iif(skb), inet6_sdif(skb));
 
 	while (sk) {
 		int filtered;
@@ -222,7 +225,7 @@ static bool ipv6_raw_deliver(struct sk_buff *skb, int nexthdr)
 			}
 		}
 		sk = __raw_v6_lookup(net, sk_next(sk), nexthdr, daddr, saddr,
-				     inet6_iif(skb));
+				     inet6_iif(skb), inet6_sdif(skb));
 	}
 out:
 	read_unlock(&raw_v6_hashinfo.lock);
@@ -378,7 +381,7 @@ void raw6_icmp_error(struct sk_buff *skb, int nexthdr,
 		net = dev_net(skb->dev);
 
 		while ((sk = __raw_v6_lookup(net, sk, nexthdr, saddr, daddr,
-						inet6_iif(skb)))) {
+					     inet6_iif(skb), inet6_iif(skb)))) {
 			rawv6_err(sk, skb, NULL, type, code,
 					inner_offset, info);
 			sk = sk_next(sk);
-- 
cgit v1.2.3


From 8113c095672f6504b23eba6edf4a57b5f7f744af Mon Sep 17 00:00:00 2001
From: WANG Cong <xiyou.wangcong@gmail.com>
Date: Fri, 4 Aug 2017 21:31:43 -0700
Subject: net_sched: use void pointer for filter handle

Now we use 'unsigned long fh' as a pointer in every place,
it is safe to convert it to a void pointer now. This gets
rid of many casts to pointer.

Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Cc: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/pkt_cls.h     |  2 +-
 include/net/sch_generic.h |  8 ++++----
 net/sched/cls_api.c       | 17 ++++++++---------
 net/sched/cls_basic.c     | 22 ++++++++++------------
 net/sched/cls_bpf.c       | 27 ++++++++++++---------------
 net/sched/cls_cgroup.c    | 12 ++++++------
 net/sched/cls_flow.c      | 24 ++++++++++++------------
 net/sched/cls_flower.c    | 22 +++++++++++-----------
 net/sched/cls_fw.c        | 26 +++++++++++++-------------
 net/sched/cls_matchall.c  | 16 ++++++++--------
 net/sched/cls_route.c     | 26 +++++++++++++-------------
 net/sched/cls_rsvp.h      | 24 ++++++++++++------------
 net/sched/cls_tcindex.c   | 36 ++++++++++++++++--------------------
 net/sched/cls_u32.c       | 30 +++++++++++++++---------------
 14 files changed, 141 insertions(+), 151 deletions(-)

(limited to 'include/net')

diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 572083af02ac..0f78e6560b2d 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -11,7 +11,7 @@ struct tcf_walker {
 	int	stop;
 	int	skip;
 	int	count;
-	int	(*fn)(struct tcf_proto *, unsigned long node, struct tcf_walker *);
+	int	(*fn)(struct tcf_proto *, void *node, struct tcf_walker *);
 };
 
 int register_tcf_proto_ops(struct tcf_proto_ops *ops);
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 1c123e2b2415..e79f5ad1c5f3 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -213,16 +213,16 @@ struct tcf_proto_ops {
 	int			(*init)(struct tcf_proto*);
 	void			(*destroy)(struct tcf_proto*);
 
-	unsigned long		(*get)(struct tcf_proto*, u32 handle);
+	void*			(*get)(struct tcf_proto*, u32 handle);
 	int			(*change)(struct net *net, struct sk_buff *,
 					struct tcf_proto*, unsigned long,
 					u32 handle, struct nlattr **,
-					unsigned long *, bool);
-	int			(*delete)(struct tcf_proto*, unsigned long, bool*);
+					void **, bool);
+	int			(*delete)(struct tcf_proto*, void *, bool*);
 	void			(*walk)(struct tcf_proto*, struct tcf_walker *arg);
 
 	/* rtnetlink specific */
-	int			(*dump)(struct net*, struct tcf_proto*, unsigned long,
+	int			(*dump)(struct net*, struct tcf_proto*, void *,
 					struct sk_buff *skb, struct tcmsg*);
 
 	struct module		*owner;
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index afd099727aea..668afb6e9885 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -102,11 +102,11 @@ EXPORT_SYMBOL(unregister_tcf_proto_ops);
 
 static int tfilter_notify(struct net *net, struct sk_buff *oskb,
 			  struct nlmsghdr *n, struct tcf_proto *tp,
-			  unsigned long fh, int event, bool unicast);
+			  void *fh, int event, bool unicast);
 
 static int tfilter_del_notify(struct net *net, struct sk_buff *oskb,
 			      struct nlmsghdr *n, struct tcf_proto *tp,
-			      unsigned long fh, bool unicast, bool *last);
+			      void *fh, bool unicast, bool *last);
 
 static void tfilter_notify_chain(struct net *net, struct sk_buff *oskb,
 				 struct nlmsghdr *n,
@@ -432,7 +432,7 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
 	struct tcf_proto *tp;
 	const struct Qdisc_class_ops *cops;
 	unsigned long cl;
-	unsigned long fh;
+	void *fh;
 	int err;
 	int tp_created;
 
@@ -571,7 +571,7 @@ replay:
 
 	fh = tp->ops->get(tp, t->tcm_handle);
 
-	if (fh == 0) {
+	if (!fh) {
 		if (n->nlmsg_type == RTM_DELTFILTER && t->tcm_handle == 0) {
 			tcf_chain_tp_remove(chain, &chain_info, tp);
 			tfilter_notify(net, skb, n, tp, fh,
@@ -641,7 +641,7 @@ errout:
 }
 
 static int tcf_fill_node(struct net *net, struct sk_buff *skb,
-			 struct tcf_proto *tp, unsigned long fh, u32 portid,
+			 struct tcf_proto *tp, void *fh, u32 portid,
 			 u32 seq, u16 flags, int event)
 {
 	struct tcmsg *tcm;
@@ -679,7 +679,7 @@ nla_put_failure:
 
 static int tfilter_notify(struct net *net, struct sk_buff *oskb,
 			  struct nlmsghdr *n, struct tcf_proto *tp,
-			  unsigned long fh, int event, bool unicast)
+			  void *fh, int event, bool unicast)
 {
 	struct sk_buff *skb;
 	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
@@ -703,7 +703,7 @@ static int tfilter_notify(struct net *net, struct sk_buff *oskb,
 
 static int tfilter_del_notify(struct net *net, struct sk_buff *oskb,
 			      struct nlmsghdr *n, struct tcf_proto *tp,
-			      unsigned long fh, bool unicast, bool *last)
+			      void *fh, bool unicast, bool *last)
 {
 	struct sk_buff *skb;
 	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
@@ -738,8 +738,7 @@ struct tcf_dump_args {
 	struct netlink_callback *cb;
 };
 
-static int tcf_node_dump(struct tcf_proto *tp, unsigned long n,
-			 struct tcf_walker *arg)
+static int tcf_node_dump(struct tcf_proto *tp, void *n, struct tcf_walker *arg)
 {
 	struct tcf_dump_args *a = (void *)arg;
 	struct net *net = sock_net(a->skb->sk);
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index 7c7a82138f76..73cc7f167a38 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -56,20 +56,18 @@ static int basic_classify(struct sk_buff *skb, const struct tcf_proto *tp,
 	return -1;
 }
 
-static unsigned long basic_get(struct tcf_proto *tp, u32 handle)
+static void *basic_get(struct tcf_proto *tp, u32 handle)
 {
-	unsigned long l = 0UL;
 	struct basic_head *head = rtnl_dereference(tp->root);
 	struct basic_filter *f;
 
 	list_for_each_entry(f, &head->flist, link) {
 		if (f->handle == handle) {
-			l = (unsigned long) f;
-			break;
+			return f;
 		}
 	}
 
-	return l;
+	return NULL;
 }
 
 static int basic_init(struct tcf_proto *tp)
@@ -106,10 +104,10 @@ static void basic_destroy(struct tcf_proto *tp)
 	kfree_rcu(head, rcu);
 }
 
-static int basic_delete(struct tcf_proto *tp, unsigned long arg, bool *last)
+static int basic_delete(struct tcf_proto *tp, void *arg, bool *last)
 {
 	struct basic_head *head = rtnl_dereference(tp->root);
-	struct basic_filter *f = (struct basic_filter *) arg;
+	struct basic_filter *f = arg;
 
 	list_del_rcu(&f->link);
 	tcf_unbind_filter(tp, &f->res);
@@ -149,7 +147,7 @@ static int basic_set_parms(struct net *net, struct tcf_proto *tp,
 
 static int basic_change(struct net *net, struct sk_buff *in_skb,
 			struct tcf_proto *tp, unsigned long base, u32 handle,
-			struct nlattr **tca, unsigned long *arg, bool ovr)
+			struct nlattr **tca, void **arg, bool ovr)
 {
 	int err;
 	struct basic_head *head = rtnl_dereference(tp->root);
@@ -202,7 +200,7 @@ static int basic_change(struct net *net, struct sk_buff *in_skb,
 	if (err < 0)
 		goto errout;
 
-	*arg = (unsigned long)fnew;
+	*arg = fnew;
 
 	if (fold) {
 		list_replace_rcu(&fold->link, &fnew->link);
@@ -228,7 +226,7 @@ static void basic_walk(struct tcf_proto *tp, struct tcf_walker *arg)
 		if (arg->count < arg->skip)
 			goto skip;
 
-		if (arg->fn(tp, (unsigned long) f, arg) < 0) {
+		if (arg->fn(tp, f, arg) < 0) {
 			arg->stop = 1;
 			break;
 		}
@@ -237,10 +235,10 @@ skip:
 	}
 }
 
-static int basic_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
+static int basic_dump(struct net *net, struct tcf_proto *tp, void *fh,
 		      struct sk_buff *skb, struct tcmsg *t)
 {
-	struct basic_filter *f = (struct basic_filter *) fh;
+	struct basic_filter *f = fh;
 	struct nlattr *nest;
 
 	if (f == NULL)
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index 2d4d06e41cd9..db17b68df94e 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -270,11 +270,11 @@ static void __cls_bpf_delete(struct tcf_proto *tp, struct cls_bpf_prog *prog)
 	call_rcu(&prog->rcu, cls_bpf_delete_prog_rcu);
 }
 
-static int cls_bpf_delete(struct tcf_proto *tp, unsigned long arg, bool *last)
+static int cls_bpf_delete(struct tcf_proto *tp, void *arg, bool *last)
 {
 	struct cls_bpf_head *head = rtnl_dereference(tp->root);
 
-	__cls_bpf_delete(tp, (struct cls_bpf_prog *) arg);
+	__cls_bpf_delete(tp, arg);
 	*last = list_empty(&head->plist);
 	return 0;
 }
@@ -290,20 +290,17 @@ static void cls_bpf_destroy(struct tcf_proto *tp)
 	kfree_rcu(head, rcu);
 }
 
-static unsigned long cls_bpf_get(struct tcf_proto *tp, u32 handle)
+static void *cls_bpf_get(struct tcf_proto *tp, u32 handle)
 {
 	struct cls_bpf_head *head = rtnl_dereference(tp->root);
 	struct cls_bpf_prog *prog;
-	unsigned long ret = 0UL;
 
 	list_for_each_entry(prog, &head->plist, link) {
-		if (prog->handle == handle) {
-			ret = (unsigned long) prog;
-			break;
-		}
+		if (prog->handle == handle)
+			return prog;
 	}
 
-	return ret;
+	return NULL;
 }
 
 static int cls_bpf_prog_from_ops(struct nlattr **tb, struct cls_bpf_prog *prog)
@@ -448,10 +445,10 @@ static u32 cls_bpf_grab_new_handle(struct tcf_proto *tp,
 static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
 			  struct tcf_proto *tp, unsigned long base,
 			  u32 handle, struct nlattr **tca,
-			  unsigned long *arg, bool ovr)
+			  void **arg, bool ovr)
 {
 	struct cls_bpf_head *head = rtnl_dereference(tp->root);
-	struct cls_bpf_prog *oldprog = (struct cls_bpf_prog *) *arg;
+	struct cls_bpf_prog *oldprog = *arg;
 	struct nlattr *tb[TCA_BPF_MAX + 1];
 	struct cls_bpf_prog *prog;
 	int ret;
@@ -509,7 +506,7 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
 		list_add_rcu(&prog->link, &head->plist);
 	}
 
-	*arg = (unsigned long) prog;
+	*arg = prog;
 	return 0;
 
 errout:
@@ -557,10 +554,10 @@ static int cls_bpf_dump_ebpf_info(const struct cls_bpf_prog *prog,
 	return 0;
 }
 
-static int cls_bpf_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
+static int cls_bpf_dump(struct net *net, struct tcf_proto *tp, void *fh,
 			struct sk_buff *skb, struct tcmsg *tm)
 {
-	struct cls_bpf_prog *prog = (struct cls_bpf_prog *) fh;
+	struct cls_bpf_prog *prog = fh;
 	struct nlattr *nest;
 	u32 bpf_flags = 0;
 	int ret;
@@ -618,7 +615,7 @@ static void cls_bpf_walk(struct tcf_proto *tp, struct tcf_walker *arg)
 	list_for_each_entry(prog, &head->plist, link) {
 		if (arg->count < arg->skip)
 			goto skip;
-		if (arg->fn(tp, (unsigned long) prog, arg) < 0) {
+		if (arg->fn(tp, prog, arg) < 0) {
 			arg->stop = 1;
 			break;
 		}
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index df7a582775df..d48452f87975 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -43,9 +43,9 @@ static int cls_cgroup_classify(struct sk_buff *skb, const struct tcf_proto *tp,
 	return tcf_exts_exec(skb, &head->exts, res);
 }
 
-static unsigned long cls_cgroup_get(struct tcf_proto *tp, u32 handle)
+static void *cls_cgroup_get(struct tcf_proto *tp, u32 handle)
 {
-	return 0UL;
+	return NULL;
 }
 
 static int cls_cgroup_init(struct tcf_proto *tp)
@@ -71,7 +71,7 @@ static void cls_cgroup_destroy_rcu(struct rcu_head *root)
 static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb,
 			     struct tcf_proto *tp, unsigned long base,
 			     u32 handle, struct nlattr **tca,
-			     unsigned long *arg, bool ovr)
+			     void **arg, bool ovr)
 {
 	struct nlattr *tb[TCA_CGROUP_MAX + 1];
 	struct cls_cgroup_head *head = rtnl_dereference(tp->root);
@@ -128,7 +128,7 @@ static void cls_cgroup_destroy(struct tcf_proto *tp)
 		call_rcu(&head->rcu, cls_cgroup_destroy_rcu);
 }
 
-static int cls_cgroup_delete(struct tcf_proto *tp, unsigned long arg, bool *last)
+static int cls_cgroup_delete(struct tcf_proto *tp, void *arg, bool *last)
 {
 	return -EOPNOTSUPP;
 }
@@ -140,7 +140,7 @@ static void cls_cgroup_walk(struct tcf_proto *tp, struct tcf_walker *arg)
 	if (arg->count < arg->skip)
 		goto skip;
 
-	if (arg->fn(tp, (unsigned long) head, arg) < 0) {
+	if (arg->fn(tp, head, arg) < 0) {
 		arg->stop = 1;
 		return;
 	}
@@ -148,7 +148,7 @@ skip:
 	arg->count++;
 }
 
-static int cls_cgroup_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
+static int cls_cgroup_dump(struct net *net, struct tcf_proto *tp, void *fh,
 			   struct sk_buff *skb, struct tcmsg *t)
 {
 	struct cls_cgroup_head *head = rtnl_dereference(tp->root);
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index 55e281b20140..2a3a60ec5b86 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -382,7 +382,7 @@ static void flow_destroy_filter(struct rcu_head *head)
 static int flow_change(struct net *net, struct sk_buff *in_skb,
 		       struct tcf_proto *tp, unsigned long base,
 		       u32 handle, struct nlattr **tca,
-		       unsigned long *arg, bool ovr)
+		       void **arg, bool ovr)
 {
 	struct flow_head *head = rtnl_dereference(tp->root);
 	struct flow_filter *fold, *fnew;
@@ -439,7 +439,7 @@ static int flow_change(struct net *net, struct sk_buff *in_skb,
 	if (err < 0)
 		goto err2;
 
-	fold = (struct flow_filter *)*arg;
+	fold = *arg;
 	if (fold) {
 		err = -EINVAL;
 		if (fold->handle != handle && handle)
@@ -532,12 +532,12 @@ static int flow_change(struct net *net, struct sk_buff *in_skb,
 	if (perturb_period)
 		mod_timer(&fnew->perturb_timer, jiffies + perturb_period);
 
-	if (*arg == 0)
+	if (!*arg)
 		list_add_tail_rcu(&fnew->list, &head->filters);
 	else
 		list_replace_rcu(&fold->list, &fnew->list);
 
-	*arg = (unsigned long)fnew;
+	*arg = fnew;
 
 	if (fold)
 		call_rcu(&fold->rcu, flow_destroy_filter);
@@ -551,10 +551,10 @@ err1:
 	return err;
 }
 
-static int flow_delete(struct tcf_proto *tp, unsigned long arg, bool *last)
+static int flow_delete(struct tcf_proto *tp, void *arg, bool *last)
 {
 	struct flow_head *head = rtnl_dereference(tp->root);
-	struct flow_filter *f = (struct flow_filter *)arg;
+	struct flow_filter *f = arg;
 
 	list_del_rcu(&f->list);
 	call_rcu(&f->rcu, flow_destroy_filter);
@@ -586,21 +586,21 @@ static void flow_destroy(struct tcf_proto *tp)
 	kfree_rcu(head, rcu);
 }
 
-static unsigned long flow_get(struct tcf_proto *tp, u32 handle)
+static void *flow_get(struct tcf_proto *tp, u32 handle)
 {
 	struct flow_head *head = rtnl_dereference(tp->root);
 	struct flow_filter *f;
 
 	list_for_each_entry(f, &head->filters, list)
 		if (f->handle == handle)
-			return (unsigned long)f;
-	return 0;
+			return f;
+	return NULL;
 }
 
-static int flow_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
+static int flow_dump(struct net *net, struct tcf_proto *tp, void *fh,
 		     struct sk_buff *skb, struct tcmsg *t)
 {
-	struct flow_filter *f = (struct flow_filter *)fh;
+	struct flow_filter *f = fh;
 	struct nlattr *nest;
 
 	if (f == NULL)
@@ -666,7 +666,7 @@ static void flow_walk(struct tcf_proto *tp, struct tcf_walker *arg)
 	list_for_each_entry(f, &head->filters, list) {
 		if (arg->count < arg->skip)
 			goto skip;
-		if (arg->fn(tp, (unsigned long)f, arg) < 0) {
+		if (arg->fn(tp, f, arg) < 0) {
 			arg->stop = 1;
 			break;
 		}
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 1474bacf4df4..d2551a03c542 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -332,15 +332,15 @@ static void fl_destroy(struct tcf_proto *tp)
 	call_rcu(&head->rcu, fl_destroy_rcu);
 }
 
-static unsigned long fl_get(struct tcf_proto *tp, u32 handle)
+static void *fl_get(struct tcf_proto *tp, u32 handle)
 {
 	struct cls_fl_head *head = rtnl_dereference(tp->root);
 	struct cls_fl_filter *f;
 
 	list_for_each_entry(f, &head->filters, list)
 		if (f->handle == handle)
-			return (unsigned long) f;
-	return 0;
+			return f;
+	return NULL;
 }
 
 static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = {
@@ -883,10 +883,10 @@ static u32 fl_grab_new_handle(struct tcf_proto *tp,
 static int fl_change(struct net *net, struct sk_buff *in_skb,
 		     struct tcf_proto *tp, unsigned long base,
 		     u32 handle, struct nlattr **tca,
-		     unsigned long *arg, bool ovr)
+		     void **arg, bool ovr)
 {
 	struct cls_fl_head *head = rtnl_dereference(tp->root);
-	struct cls_fl_filter *fold = (struct cls_fl_filter *) *arg;
+	struct cls_fl_filter *fold = *arg;
 	struct cls_fl_filter *fnew;
 	struct nlattr **tb;
 	struct fl_flow_mask mask = {};
@@ -977,7 +977,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
 			fl_hw_destroy_filter(tp, fold);
 	}
 
-	*arg = (unsigned long) fnew;
+	*arg = fnew;
 
 	if (fold) {
 		list_replace_rcu(&fold->list, &fnew->list);
@@ -998,10 +998,10 @@ errout_tb:
 	return err;
 }
 
-static int fl_delete(struct tcf_proto *tp, unsigned long arg, bool *last)
+static int fl_delete(struct tcf_proto *tp, void *arg, bool *last)
 {
 	struct cls_fl_head *head = rtnl_dereference(tp->root);
-	struct cls_fl_filter *f = (struct cls_fl_filter *) arg;
+	struct cls_fl_filter *f = arg;
 
 	if (!tc_skip_sw(f->flags))
 		rhashtable_remove_fast(&head->ht, &f->ht_node,
@@ -1019,7 +1019,7 @@ static void fl_walk(struct tcf_proto *tp, struct tcf_walker *arg)
 	list_for_each_entry_rcu(f, &head->filters, list) {
 		if (arg->count < arg->skip)
 			goto skip;
-		if (arg->fn(tp, (unsigned long) f, arg) < 0) {
+		if (arg->fn(tp, f, arg) < 0) {
 			arg->stop = 1;
 			break;
 		}
@@ -1154,11 +1154,11 @@ static int fl_dump_key_flags(struct sk_buff *skb, u32 flags_key, u32 flags_mask)
 	return nla_put(skb, TCA_FLOWER_KEY_FLAGS_MASK, 4, &_mask);
 }
 
-static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
+static int fl_dump(struct net *net, struct tcf_proto *tp, void *fh,
 		   struct sk_buff *skb, struct tcmsg *t)
 {
 	struct cls_fl_head *head = rtnl_dereference(tp->root);
-	struct cls_fl_filter *f = (struct cls_fl_filter *) fh;
+	struct cls_fl_filter *f = fh;
 	struct nlattr *nest;
 	struct fl_flow_key *key, *mask;
 
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index 11f178f1b2be..192255ec50bd 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -95,20 +95,20 @@ static int fw_classify(struct sk_buff *skb, const struct tcf_proto *tp,
 	return -1;
 }
 
-static unsigned long fw_get(struct tcf_proto *tp, u32 handle)
+static void *fw_get(struct tcf_proto *tp, u32 handle)
 {
 	struct fw_head *head = rtnl_dereference(tp->root);
 	struct fw_filter *f;
 
 	if (head == NULL)
-		return 0;
+		return NULL;
 
 	f = rtnl_dereference(head->ht[fw_hash(handle)]);
 	for (; f; f = rtnl_dereference(f->next)) {
 		if (f->id == handle)
-			return (unsigned long)f;
+			return f;
 	}
-	return 0;
+	return NULL;
 }
 
 static int fw_init(struct tcf_proto *tp)
@@ -147,10 +147,10 @@ static void fw_destroy(struct tcf_proto *tp)
 	kfree_rcu(head, rcu);
 }
 
-static int fw_delete(struct tcf_proto *tp, unsigned long arg, bool *last)
+static int fw_delete(struct tcf_proto *tp, void *arg, bool *last)
 {
 	struct fw_head *head = rtnl_dereference(tp->root);
-	struct fw_filter *f = (struct fw_filter *)arg;
+	struct fw_filter *f = arg;
 	struct fw_filter __rcu **fp;
 	struct fw_filter *pfp;
 	int ret = -EINVAL;
@@ -230,11 +230,11 @@ static int fw_set_parms(struct net *net, struct tcf_proto *tp,
 
 static int fw_change(struct net *net, struct sk_buff *in_skb,
 		     struct tcf_proto *tp, unsigned long base,
-		     u32 handle, struct nlattr **tca, unsigned long *arg,
+		     u32 handle, struct nlattr **tca, void **arg,
 		     bool ovr)
 {
 	struct fw_head *head = rtnl_dereference(tp->root);
-	struct fw_filter *f = (struct fw_filter *) *arg;
+	struct fw_filter *f = *arg;
 	struct nlattr *opt = tca[TCA_OPTIONS];
 	struct nlattr *tb[TCA_FW_MAX + 1];
 	int err;
@@ -288,7 +288,7 @@ static int fw_change(struct net *net, struct sk_buff *in_skb,
 		tcf_unbind_filter(tp, &f->res);
 		call_rcu(&f->rcu, fw_delete_filter);
 
-		*arg = (unsigned long)fnew;
+		*arg = fnew;
 		return err;
 	}
 
@@ -325,7 +325,7 @@ static int fw_change(struct net *net, struct sk_buff *in_skb,
 	RCU_INIT_POINTER(f->next, head->ht[fw_hash(handle)]);
 	rcu_assign_pointer(head->ht[fw_hash(handle)], f);
 
-	*arg = (unsigned long)f;
+	*arg = f;
 	return 0;
 
 errout:
@@ -354,7 +354,7 @@ static void fw_walk(struct tcf_proto *tp, struct tcf_walker *arg)
 				arg->count++;
 				continue;
 			}
-			if (arg->fn(tp, (unsigned long)f, arg) < 0) {
+			if (arg->fn(tp, f, arg) < 0) {
 				arg->stop = 1;
 				return;
 			}
@@ -363,11 +363,11 @@ static void fw_walk(struct tcf_proto *tp, struct tcf_walker *arg)
 	}
 }
 
-static int fw_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
+static int fw_dump(struct net *net, struct tcf_proto *tp, void *fh,
 		   struct sk_buff *skb, struct tcmsg *t)
 {
 	struct fw_head *head = rtnl_dereference(tp->root);
-	struct fw_filter *f = (struct fw_filter *)fh;
+	struct fw_filter *f = fh;
 	struct nlattr *nest;
 
 	if (f == NULL)
diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c
index c9f6500b8080..d44e26fdae84 100644
--- a/net/sched/cls_matchall.c
+++ b/net/sched/cls_matchall.c
@@ -98,9 +98,9 @@ static void mall_destroy(struct tcf_proto *tp)
 	call_rcu(&head->rcu, mall_destroy_rcu);
 }
 
-static unsigned long mall_get(struct tcf_proto *tp, u32 handle)
+static void *mall_get(struct tcf_proto *tp, u32 handle)
 {
-	return 0UL;
+	return NULL;
 }
 
 static const struct nla_policy mall_policy[TCA_MATCHALL_MAX + 1] = {
@@ -129,7 +129,7 @@ static int mall_set_parms(struct net *net, struct tcf_proto *tp,
 static int mall_change(struct net *net, struct sk_buff *in_skb,
 		       struct tcf_proto *tp, unsigned long base,
 		       u32 handle, struct nlattr **tca,
-		       unsigned long *arg, bool ovr)
+		       void **arg, bool ovr)
 {
 	struct cls_mall_head *head = rtnl_dereference(tp->root);
 	struct net_device *dev = tp->q->dev_queue->dev;
@@ -185,7 +185,7 @@ static int mall_change(struct net *net, struct sk_buff *in_skb,
 	if (!tc_in_hw(new->flags))
 		new->flags |= TCA_CLS_FLAGS_NOT_IN_HW;
 
-	*arg = (unsigned long) head;
+	*arg = head;
 	rcu_assign_pointer(tp->root, new);
 	return 0;
 
@@ -197,7 +197,7 @@ err_exts_init:
 	return err;
 }
 
-static int mall_delete(struct tcf_proto *tp, unsigned long arg, bool *last)
+static int mall_delete(struct tcf_proto *tp, void *arg, bool *last)
 {
 	return -EOPNOTSUPP;
 }
@@ -208,16 +208,16 @@ static void mall_walk(struct tcf_proto *tp, struct tcf_walker *arg)
 
 	if (arg->count < arg->skip)
 		goto skip;
-	if (arg->fn(tp, (unsigned long) head, arg) < 0)
+	if (arg->fn(tp, head, arg) < 0)
 		arg->stop = 1;
 skip:
 	arg->count++;
 }
 
-static int mall_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
+static int mall_dump(struct net *net, struct tcf_proto *tp, void *fh,
 		     struct sk_buff *skb, struct tcmsg *t)
 {
-	struct cls_mall_head *head = (struct cls_mall_head *) fh;
+	struct cls_mall_head *head = fh;
 	struct nlattr *nest;
 
 	if (!head)
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index f1e7d7850b44..3b70982394ce 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -216,7 +216,7 @@ static inline u32 from_hash(u32 id)
 	return 16 + (id & 0xF);
 }
 
-static unsigned long route4_get(struct tcf_proto *tp, u32 handle)
+static void *route4_get(struct tcf_proto *tp, u32 handle)
 {
 	struct route4_head *head = rtnl_dereference(tp->root);
 	struct route4_bucket *b;
@@ -225,11 +225,11 @@ static unsigned long route4_get(struct tcf_proto *tp, u32 handle)
 
 	h1 = to_hash(handle);
 	if (h1 > 256)
-		return 0;
+		return NULL;
 
 	h2 = from_hash(handle >> 16);
 	if (h2 > 32)
-		return 0;
+		return NULL;
 
 	b = rtnl_dereference(head->table[h1]);
 	if (b) {
@@ -237,9 +237,9 @@ static unsigned long route4_get(struct tcf_proto *tp, u32 handle)
 		     f;
 		     f = rtnl_dereference(f->next))
 			if (f->handle == handle)
-				return (unsigned long)f;
+				return f;
 	}
-	return 0;
+	return NULL;
 }
 
 static int route4_init(struct tcf_proto *tp)
@@ -294,10 +294,10 @@ static void route4_destroy(struct tcf_proto *tp)
 	kfree_rcu(head, rcu);
 }
 
-static int route4_delete(struct tcf_proto *tp, unsigned long arg, bool *last)
+static int route4_delete(struct tcf_proto *tp, void *arg, bool *last)
 {
 	struct route4_head *head = rtnl_dereference(tp->root);
-	struct route4_filter *f = (struct route4_filter *)arg;
+	struct route4_filter *f = arg;
 	struct route4_filter __rcu **fp;
 	struct route4_filter *nf;
 	struct route4_bucket *b;
@@ -448,7 +448,7 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp,
 
 static int route4_change(struct net *net, struct sk_buff *in_skb,
 			 struct tcf_proto *tp, unsigned long base, u32 handle,
-			 struct nlattr **tca, unsigned long *arg, bool ovr)
+			 struct nlattr **tca, void **arg, bool ovr)
 {
 	struct route4_head *head = rtnl_dereference(tp->root);
 	struct route4_filter __rcu **fp;
@@ -467,7 +467,7 @@ static int route4_change(struct net *net, struct sk_buff *in_skb,
 	if (err < 0)
 		return err;
 
-	fold = (struct route4_filter *)*arg;
+	fold = *arg;
 	if (fold && handle && fold->handle != handle)
 			return -EINVAL;
 
@@ -525,7 +525,7 @@ static int route4_change(struct net *net, struct sk_buff *in_skb,
 	}
 
 	route4_reset_fastmap(head);
-	*arg = (unsigned long)f;
+	*arg = f;
 	if (fold) {
 		tcf_unbind_filter(tp, &fold->res);
 		call_rcu(&fold->rcu, route4_delete_filter);
@@ -564,7 +564,7 @@ static void route4_walk(struct tcf_proto *tp, struct tcf_walker *arg)
 						arg->count++;
 						continue;
 					}
-					if (arg->fn(tp, (unsigned long)f, arg) < 0) {
+					if (arg->fn(tp, f, arg) < 0) {
 						arg->stop = 1;
 						return;
 					}
@@ -575,10 +575,10 @@ static void route4_walk(struct tcf_proto *tp, struct tcf_walker *arg)
 	}
 }
 
-static int route4_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
+static int route4_dump(struct net *net, struct tcf_proto *tp, void *fh,
 		       struct sk_buff *skb, struct tcmsg *t)
 {
-	struct route4_filter *f = (struct route4_filter *)fh;
+	struct route4_filter *f = fh;
 	struct nlattr *nest;
 	u32 id;
 
diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h
index 4adb67a73491..26203ff817f3 100644
--- a/net/sched/cls_rsvp.h
+++ b/net/sched/cls_rsvp.h
@@ -248,7 +248,7 @@ static void rsvp_replace(struct tcf_proto *tp, struct rsvp_filter *n, u32 h)
 	BUG_ON(1);
 }
 
-static unsigned long rsvp_get(struct tcf_proto *tp, u32 handle)
+static void *rsvp_get(struct tcf_proto *tp, u32 handle)
 {
 	struct rsvp_head *head = rtnl_dereference(tp->root);
 	struct rsvp_session *s;
@@ -257,17 +257,17 @@ static unsigned long rsvp_get(struct tcf_proto *tp, u32 handle)
 	unsigned int h2 = (handle >> 8) & 0xFF;
 
 	if (h2 > 16)
-		return 0;
+		return NULL;
 
 	for (s = rtnl_dereference(head->ht[h1]); s;
 	     s = rtnl_dereference(s->next)) {
 		for (f = rtnl_dereference(s->ht[h2]); f;
 		     f = rtnl_dereference(f->next)) {
 			if (f->handle == handle)
-				return (unsigned long)f;
+				return f;
 		}
 	}
-	return 0;
+	return NULL;
 }
 
 static int rsvp_init(struct tcf_proto *tp)
@@ -328,10 +328,10 @@ static void rsvp_destroy(struct tcf_proto *tp)
 	kfree_rcu(data, rcu);
 }
 
-static int rsvp_delete(struct tcf_proto *tp, unsigned long arg, bool *last)
+static int rsvp_delete(struct tcf_proto *tp, void *arg, bool *last)
 {
 	struct rsvp_head *head = rtnl_dereference(tp->root);
-	struct rsvp_filter *nfp, *f = (struct rsvp_filter *)arg;
+	struct rsvp_filter *nfp, *f = arg;
 	struct rsvp_filter __rcu **fp;
 	unsigned int h = f->handle;
 	struct rsvp_session __rcu **sp;
@@ -464,7 +464,7 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb,
 		       struct tcf_proto *tp, unsigned long base,
 		       u32 handle,
 		       struct nlattr **tca,
-		       unsigned long *arg, bool ovr)
+		       void **arg, bool ovr)
 {
 	struct rsvp_head *data = rtnl_dereference(tp->root);
 	struct rsvp_filter *f, *nfp;
@@ -493,7 +493,7 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb,
 	if (err < 0)
 		goto errout2;
 
-	f = (struct rsvp_filter *)*arg;
+	f = *arg;
 	if (f) {
 		/* Node exists: adjust only classid */
 		struct rsvp_filter *n;
@@ -604,7 +604,7 @@ insert:
 			RCU_INIT_POINTER(f->next, nfp);
 			rcu_assign_pointer(*fp, f);
 
-			*arg = (unsigned long)f;
+			*arg = f;
 			return 0;
 		}
 	}
@@ -663,7 +663,7 @@ static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg)
 						arg->count++;
 						continue;
 					}
-					if (arg->fn(tp, (unsigned long)f, arg) < 0) {
+					if (arg->fn(tp, f, arg) < 0) {
 						arg->stop = 1;
 						return;
 					}
@@ -674,10 +674,10 @@ static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg)
 	}
 }
 
-static int rsvp_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
+static int rsvp_dump(struct net *net, struct tcf_proto *tp, void *fh,
 		     struct sk_buff *skb, struct tcmsg *t)
 {
-	struct rsvp_filter *f = (struct rsvp_filter *)fh;
+	struct rsvp_filter *f = fh;
 	struct rsvp_session *s;
 	struct nlattr *nest;
 	struct tc_rsvp_pinfo pinfo;
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index d69f828f3fed..fb281b9b2c52 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -104,16 +104,16 @@ static int tcindex_classify(struct sk_buff *skb, const struct tcf_proto *tp,
 }
 
 
-static unsigned long tcindex_get(struct tcf_proto *tp, u32 handle)
+static void *tcindex_get(struct tcf_proto *tp, u32 handle)
 {
 	struct tcindex_data *p = rtnl_dereference(tp->root);
 	struct tcindex_filter_result *r;
 
 	pr_debug("tcindex_get(tp %p,handle 0x%08x)\n", tp, handle);
 	if (p->perfect && handle >= p->alloc_hash)
-		return 0;
+		return NULL;
 	r = tcindex_lookup(p, handle);
-	return r && tcindex_filter_is_set(r) ? (unsigned long) r : 0UL;
+	return r && tcindex_filter_is_set(r) ? r : NULL;
 }
 
 static int tcindex_init(struct tcf_proto *tp)
@@ -150,14 +150,14 @@ static void tcindex_destroy_fexts(struct rcu_head *head)
 	kfree(f);
 }
 
-static int tcindex_delete(struct tcf_proto *tp, unsigned long arg, bool *last)
+static int tcindex_delete(struct tcf_proto *tp, void *arg, bool *last)
 {
 	struct tcindex_data *p = rtnl_dereference(tp->root);
-	struct tcindex_filter_result *r = (struct tcindex_filter_result *) arg;
+	struct tcindex_filter_result *r = arg;
 	struct tcindex_filter __rcu **walk;
 	struct tcindex_filter *f = NULL;
 
-	pr_debug("tcindex_delete(tp %p,arg 0x%lx),p %p\n", tp, arg, p);
+	pr_debug("tcindex_delete(tp %p,arg %p),p %p\n", tp, arg, p);
 	if (p->perfect) {
 		if (!r->res.class)
 			return -ENOENT;
@@ -192,8 +192,7 @@ found:
 }
 
 static int tcindex_destroy_element(struct tcf_proto *tp,
-				   unsigned long arg,
-				   struct tcf_walker *walker)
+				   void *arg, struct tcf_walker *walker)
 {
 	bool last;
 
@@ -471,17 +470,17 @@ errout:
 static int
 tcindex_change(struct net *net, struct sk_buff *in_skb,
 	       struct tcf_proto *tp, unsigned long base, u32 handle,
-	       struct nlattr **tca, unsigned long *arg, bool ovr)
+	       struct nlattr **tca, void **arg, bool ovr)
 {
 	struct nlattr *opt = tca[TCA_OPTIONS];
 	struct nlattr *tb[TCA_TCINDEX_MAX + 1];
 	struct tcindex_data *p = rtnl_dereference(tp->root);
-	struct tcindex_filter_result *r = (struct tcindex_filter_result *) *arg;
+	struct tcindex_filter_result *r = *arg;
 	int err;
 
 	pr_debug("tcindex_change(tp %p,handle 0x%08x,tca %p,arg %p),opt %p,"
-	    "p %p,r %p,*arg 0x%lx\n",
-	    tp, handle, tca, arg, opt, p, r, arg ? *arg : 0L);
+	    "p %p,r %p,*arg %p\n",
+	    tp, handle, tca, arg, opt, p, r, arg ? *arg : NULL);
 
 	if (!opt)
 		return 0;
@@ -506,9 +505,7 @@ static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker)
 			if (!p->perfect[i].res.class)
 				continue;
 			if (walker->count >= walker->skip) {
-				if (walker->fn(tp,
-				    (unsigned long) (p->perfect+i), walker)
-				     < 0) {
+				if (walker->fn(tp, p->perfect + i, walker) < 0) {
 					walker->stop = 1;
 					return;
 				}
@@ -522,8 +519,7 @@ static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker)
 		for (f = rtnl_dereference(p->h[i]); f; f = next) {
 			next = rtnl_dereference(f->next);
 			if (walker->count >= walker->skip) {
-				if (walker->fn(tp, (unsigned long) &f->result,
-				    walker) < 0) {
+				if (walker->fn(tp, &f->result, walker) < 0) {
 					walker->stop = 1;
 					return;
 				}
@@ -548,14 +544,14 @@ static void tcindex_destroy(struct tcf_proto *tp)
 }
 
 
-static int tcindex_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
+static int tcindex_dump(struct net *net, struct tcf_proto *tp, void *fh,
 			struct sk_buff *skb, struct tcmsg *t)
 {
 	struct tcindex_data *p = rtnl_dereference(tp->root);
-	struct tcindex_filter_result *r = (struct tcindex_filter_result *) fh;
+	struct tcindex_filter_result *r = fh;
 	struct nlattr *nest;
 
-	pr_debug("tcindex_dump(tp %p,fh 0x%lx,skb %p,t %p),p %p,r %p\n",
+	pr_debug("tcindex_dump(tp %p,fh %p,skb %p,t %p),p %p,r %p\n",
 		 tp, fh, skb, t, p, r);
 	pr_debug("p->perfect %p p->h %p\n", p->perfect, p->h);
 
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 4ed51d347d0a..5a3f78181526 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -289,7 +289,7 @@ out:
 }
 
 
-static unsigned long u32_get(struct tcf_proto *tp, u32 handle)
+static void *u32_get(struct tcf_proto *tp, u32 handle)
 {
 	struct tc_u_hnode *ht;
 	struct tc_u_common *tp_c = tp->data;
@@ -300,12 +300,12 @@ static unsigned long u32_get(struct tcf_proto *tp, u32 handle)
 		ht = u32_lookup_ht(tp_c, TC_U32_HTID(handle));
 
 	if (!ht)
-		return 0;
+		return NULL;
 
 	if (TC_U32_KEY(handle) == 0)
-		return (unsigned long)ht;
+		return ht;
 
-	return (unsigned long)u32_lookup_key(ht, handle);
+	return u32_lookup_key(ht, handle);
 }
 
 static u32 gen_new_htid(struct tc_u_common *tp_c)
@@ -605,9 +605,9 @@ static void u32_destroy(struct tcf_proto *tp)
 	tp->data = NULL;
 }
 
-static int u32_delete(struct tcf_proto *tp, unsigned long arg, bool *last)
+static int u32_delete(struct tcf_proto *tp, void *arg, bool *last)
 {
-	struct tc_u_hnode *ht = (struct tc_u_hnode *)arg;
+	struct tc_u_hnode *ht = arg;
 	struct tc_u_hnode *root_ht = rtnl_dereference(tp->root);
 	struct tc_u_common *tp_c = tp->data;
 	int ret = 0;
@@ -831,7 +831,7 @@ static struct tc_u_knode *u32_init_knode(struct tcf_proto *tp,
 
 static int u32_change(struct net *net, struct sk_buff *in_skb,
 		      struct tcf_proto *tp, unsigned long base, u32 handle,
-		      struct nlattr **tca, unsigned long *arg, bool ovr)
+		      struct nlattr **tca, void **arg, bool ovr)
 {
 	struct tc_u_common *tp_c = tp->data;
 	struct tc_u_hnode *ht;
@@ -858,7 +858,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
 			return -EINVAL;
 	}
 
-	n = (struct tc_u_knode *)*arg;
+	n = *arg;
 	if (n) {
 		struct tc_u_knode *new;
 
@@ -925,7 +925,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
 
 		RCU_INIT_POINTER(ht->next, tp_c->hlist);
 		rcu_assign_pointer(tp_c->hlist, ht);
-		*arg = (unsigned long)ht;
+		*arg = ht;
 
 		return 0;
 	}
@@ -1020,7 +1020,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
 
 		RCU_INIT_POINTER(n->next, pins);
 		rcu_assign_pointer(*ins, n);
-		*arg = (unsigned long)n;
+		*arg = n;
 		return 0;
 	}
 
@@ -1054,7 +1054,7 @@ static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg)
 		if (ht->prio != tp->prio)
 			continue;
 		if (arg->count >= arg->skip) {
-			if (arg->fn(tp, (unsigned long)ht, arg) < 0) {
+			if (arg->fn(tp, ht, arg) < 0) {
 				arg->stop = 1;
 				return;
 			}
@@ -1068,7 +1068,7 @@ static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg)
 					arg->count++;
 					continue;
 				}
-				if (arg->fn(tp, (unsigned long)n, arg) < 0) {
+				if (arg->fn(tp, n, arg) < 0) {
 					arg->stop = 1;
 					return;
 				}
@@ -1078,10 +1078,10 @@ static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg)
 	}
 }
 
-static int u32_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
+static int u32_dump(struct net *net, struct tcf_proto *tp, void *fh,
 		    struct sk_buff *skb, struct tcmsg *t)
 {
-	struct tc_u_knode *n = (struct tc_u_knode *)fh;
+	struct tc_u_knode *n = fh;
 	struct tc_u_hnode *ht_up, *ht_down;
 	struct nlattr *nest;
 
@@ -1095,7 +1095,7 @@ static int u32_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
 		goto nla_put_failure;
 
 	if (TC_U32_KEY(n->handle) == 0) {
-		struct tc_u_hnode *ht = (struct tc_u_hnode *)fh;
+		struct tc_u_hnode *ht = fh;
 		u32 divisor = ht->divisor + 1;
 
 		if (nla_put_u32(skb, TCA_U32_DIVISOR, divisor))
-- 
cgit v1.2.3


From b04c80d3a7e228cfb832cdb1c9ce8151f174669c Mon Sep 17 00:00:00 2001
From: David Lebrun <david.lebrun@uclouvain.be>
Date: Sat, 5 Aug 2017 12:38:25 +0200
Subject: ipv6: sr: export SRH insertion functions

This patch exports the seg6_do_srh_encap() and seg6_do_srh_inline()
functions. It also removes the CONFIG_IPV6_SEG6_INLINE knob
that enabled the compilation of seg6_do_srh_inline(). This function
is now built-in.

Signed-off-by: David Lebrun <david.lebrun@uclouvain.be>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/seg6.h       |  2 ++
 net/ipv6/Kconfig         | 12 ------------
 net/ipv6/seg6_iptunnel.c | 12 ++++--------
 3 files changed, 6 insertions(+), 20 deletions(-)

(limited to 'include/net')

diff --git a/include/net/seg6.h b/include/net/seg6.h
index 4e0357517d79..a32abb040e1d 100644
--- a/include/net/seg6.h
+++ b/include/net/seg6.h
@@ -58,5 +58,7 @@ extern int seg6_iptunnel_init(void);
 extern void seg6_iptunnel_exit(void);
 
 extern bool seg6_validate_srh(struct ipv6_sr_hdr *srh, int len);
+extern int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh);
+extern int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh);
 
 #endif
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index 48c452959d2c..50181a96923e 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -315,18 +315,6 @@ config IPV6_SEG6_LWTUNNEL
 
 	  If unsure, say N.
 
-config IPV6_SEG6_INLINE
-	bool "IPv6: direct Segment Routing Header insertion "
-	depends on IPV6_SEG6_LWTUNNEL
-	---help---
-	  Support for direct insertion of the Segment Routing Header,
-	  also known as inline mode. Be aware that direct insertion of
-	  extension headers (as opposed to encapsulation) may break
-	  multiple mechanisms such as PMTUD or IPSec AH. Use this feature
-	  only if you know exactly what you are doing.
-
-	  If unsure, say N.
-
 config IPV6_SEG6_HMAC
 	bool "IPv6: Segment Routing HMAC support"
 	depends on IPV6
diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c
index 264d772d3c7d..501233040570 100644
--- a/net/ipv6/seg6_iptunnel.c
+++ b/net/ipv6/seg6_iptunnel.c
@@ -91,7 +91,7 @@ static void set_tun_src(struct net *net, struct net_device *dev,
 }
 
 /* encapsulate an IPv6 packet within an outer IPv6 header with a given SRH */
-static int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh)
+int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh)
 {
 	struct net *net = dev_net(skb_dst(skb)->dev);
 	struct ipv6hdr *hdr, *inner_hdr;
@@ -141,10 +141,10 @@ static int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh)
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(seg6_do_srh_encap);
 
 /* insert an SRH within an IPv6 packet, just after the IPv6 header */
-#ifdef CONFIG_IPV6_SEG6_INLINE
-static int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh)
+int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh)
 {
 	struct ipv6hdr *hdr, *oldhdr;
 	struct ipv6_sr_hdr *isrh;
@@ -193,7 +193,7 @@ static int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh)
 
 	return 0;
 }
-#endif
+EXPORT_SYMBOL_GPL(seg6_do_srh_inline);
 
 static int seg6_do_srh(struct sk_buff *skb)
 {
@@ -209,12 +209,10 @@ static int seg6_do_srh(struct sk_buff *skb)
 	}
 
 	switch (tinfo->mode) {
-#ifdef CONFIG_IPV6_SEG6_INLINE
 	case SEG6_IPTUN_MODE_INLINE:
 		err = seg6_do_srh_inline(skb, tinfo->srh);
 		skb_reset_inner_headers(skb);
 		break;
-#endif
 	case SEG6_IPTUN_MODE_ENCAP:
 		err = seg6_do_srh_encap(skb, tinfo->srh);
 		break;
@@ -357,10 +355,8 @@ static int seg6_build_state(struct nlattr *nla,
 		return -EINVAL;
 
 	switch (tuninfo->mode) {
-#ifdef CONFIG_IPV6_SEG6_INLINE
 	case SEG6_IPTUN_MODE_INLINE:
 		break;
-#endif
 	case SEG6_IPTUN_MODE_ENCAP:
 		break;
 	default:
-- 
cgit v1.2.3


From d1df6fd8a1d22d37cffa0075ab8ad423ce656777 Mon Sep 17 00:00:00 2001
From: David Lebrun <david.lebrun@uclouvain.be>
Date: Sat, 5 Aug 2017 12:38:26 +0200
Subject: ipv6: sr: define core operations for seg6local lightweight tunnel

This patch implements a new type of lightweight tunnel named seg6local.
A seg6local lwt is defined by a type of action and a set of parameters.
The action represents the operation to perform on the packets matching the
lwt's route, and is not necessarily an encapsulation. The set of parameters
are arguments for the processing function.

Each action is defined in a struct seg6_action_desc within
seg6_action_table[]. This structure contains the action, mandatory
attributes, the processing function, and a static headroom size required by
the action. The mandatory attributes are encoded as a bitmask field. The
static headroom is set to a non-zero value when the processing function
always add a constant number of bytes to the skb (e.g. the header size for
encapsulations).

To facilitate rtnetlink-related operations such as parsing, fill_encap,
and cmp_encap, each type of action parameter is associated to three
function pointers, in seg6_action_params[].

All actions defined in seg6_local.h are detailed in [1].

[1] https://tools.ietf.org/html/draft-filsfils-spring-srv6-network-programming-01

Signed-off-by: David Lebrun <david.lebrun@uclouvain.be>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/seg6_local.h      |   6 +
 include/net/seg6.h              |   2 +
 include/uapi/linux/lwtunnel.h   |   1 +
 include/uapi/linux/seg6_local.h |  68 +++++++++
 net/core/lwtunnel.c             |   2 +
 net/ipv6/Kconfig                |   3 +-
 net/ipv6/Makefile               |   2 +-
 net/ipv6/seg6.c                 |   5 +
 net/ipv6/seg6_local.c           | 320 ++++++++++++++++++++++++++++++++++++++++
 9 files changed, 407 insertions(+), 2 deletions(-)
 create mode 100644 include/linux/seg6_local.h
 create mode 100644 include/uapi/linux/seg6_local.h
 create mode 100644 net/ipv6/seg6_local.c

(limited to 'include/net')

diff --git a/include/linux/seg6_local.h b/include/linux/seg6_local.h
new file mode 100644
index 000000000000..ee63e76fe0c7
--- /dev/null
+++ b/include/linux/seg6_local.h
@@ -0,0 +1,6 @@
+#ifndef _LINUX_SEG6_LOCAL_H
+#define _LINUX_SEG6_LOCAL_H
+
+#include <uapi/linux/seg6_local.h>
+
+#endif
diff --git a/include/net/seg6.h b/include/net/seg6.h
index a32abb040e1d..5379f550f521 100644
--- a/include/net/seg6.h
+++ b/include/net/seg6.h
@@ -56,6 +56,8 @@ extern int seg6_init(void);
 extern void seg6_exit(void);
 extern int seg6_iptunnel_init(void);
 extern void seg6_iptunnel_exit(void);
+extern int seg6_local_init(void);
+extern void seg6_local_exit(void);
 
 extern bool seg6_validate_srh(struct ipv6_sr_hdr *srh, int len);
 extern int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh);
diff --git a/include/uapi/linux/lwtunnel.h b/include/uapi/linux/lwtunnel.h
index 92724cba1eba..7fdd19ca7511 100644
--- a/include/uapi/linux/lwtunnel.h
+++ b/include/uapi/linux/lwtunnel.h
@@ -11,6 +11,7 @@ enum lwtunnel_encap_types {
 	LWTUNNEL_ENCAP_IP6,
 	LWTUNNEL_ENCAP_SEG6,
 	LWTUNNEL_ENCAP_BPF,
+	LWTUNNEL_ENCAP_SEG6_LOCAL,
 	__LWTUNNEL_ENCAP_MAX,
 };
 
diff --git a/include/uapi/linux/seg6_local.h b/include/uapi/linux/seg6_local.h
new file mode 100644
index 000000000000..ef2d8c3e76c1
--- /dev/null
+++ b/include/uapi/linux/seg6_local.h
@@ -0,0 +1,68 @@
+/*
+ *  SR-IPv6 implementation
+ *
+ *  Author:
+ *  David Lebrun <david.lebrun@uclouvain.be>
+ *
+ *
+ *  This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _UAPI_LINUX_SEG6_LOCAL_H
+#define _UAPI_LINUX_SEG6_LOCAL_H
+
+#include <linux/seg6.h>
+
+enum {
+	SEG6_LOCAL_UNSPEC,
+	SEG6_LOCAL_ACTION,
+	SEG6_LOCAL_SRH,
+	SEG6_LOCAL_TABLE,
+	SEG6_LOCAL_NH4,
+	SEG6_LOCAL_NH6,
+	SEG6_LOCAL_IIF,
+	SEG6_LOCAL_OIF,
+	__SEG6_LOCAL_MAX,
+};
+#define SEG6_LOCAL_MAX (__SEG6_LOCAL_MAX - 1)
+
+enum {
+	SEG6_LOCAL_ACTION_UNSPEC	= 0,
+	/* node segment */
+	SEG6_LOCAL_ACTION_END		= 1,
+	/* adjacency segment (IPv6 cross-connect) */
+	SEG6_LOCAL_ACTION_END_X		= 2,
+	/* lookup of next seg NH in table */
+	SEG6_LOCAL_ACTION_END_T		= 3,
+	/* decap and L2 cross-connect */
+	SEG6_LOCAL_ACTION_END_DX2	= 4,
+	/* decap and IPv6 cross-connect */
+	SEG6_LOCAL_ACTION_END_DX6	= 5,
+	/* decap and IPv4 cross-connect */
+	SEG6_LOCAL_ACTION_END_DX4	= 6,
+	/* decap and lookup of DA in v6 table */
+	SEG6_LOCAL_ACTION_END_DT6	= 7,
+	/* decap and lookup of DA in v4 table */
+	SEG6_LOCAL_ACTION_END_DT4	= 8,
+	/* binding segment with insertion */
+	SEG6_LOCAL_ACTION_END_B6	= 9,
+	/* binding segment with encapsulation */
+	SEG6_LOCAL_ACTION_END_B6_ENCAP	= 10,
+	/* binding segment with MPLS encap */
+	SEG6_LOCAL_ACTION_END_BM	= 11,
+	/* lookup last seg in table */
+	SEG6_LOCAL_ACTION_END_S		= 12,
+	/* forward to SR-unaware VNF with static proxy */
+	SEG6_LOCAL_ACTION_END_AS	= 13,
+	/* forward to SR-unaware VNF with masquerading */
+	SEG6_LOCAL_ACTION_END_AM	= 14,
+
+	__SEG6_LOCAL_ACTION_MAX,
+};
+
+#define SEG6_LOCAL_ACTION_MAX (__SEG6_LOCAL_ACTION_MAX - 1)
+
+#endif
diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c
index 435f35f9a61c..0b171756453c 100644
--- a/net/core/lwtunnel.c
+++ b/net/core/lwtunnel.c
@@ -44,6 +44,8 @@ static const char *lwtunnel_encap_str(enum lwtunnel_encap_types encap_type)
 		return "SEG6";
 	case LWTUNNEL_ENCAP_BPF:
 		return "BPF";
+	case LWTUNNEL_ENCAP_SEG6_LOCAL:
+		return "SEG6LOCAL";
 	case LWTUNNEL_ENCAP_IP6:
 	case LWTUNNEL_ENCAP_IP:
 	case LWTUNNEL_ENCAP_NONE:
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index 50181a96923e..0d722396dce6 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -311,7 +311,8 @@ config IPV6_SEG6_LWTUNNEL
 	---help---
 	  Support for encapsulation of packets within an outer IPv6
 	  header and a Segment Routing Header using the lightweight
-	  tunnels mechanism.
+	  tunnels mechanism. Also enable support for advanced local
+	  processing of SRv6 packets based on their active segment.
 
 	  If unsure, say N.
 
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
index f8b24c2e0d77..10e342363793 100644
--- a/net/ipv6/Makefile
+++ b/net/ipv6/Makefile
@@ -23,7 +23,7 @@ ipv6-$(CONFIG_IPV6_MULTIPLE_TABLES) += fib6_rules.o
 ipv6-$(CONFIG_PROC_FS) += proc.o
 ipv6-$(CONFIG_SYN_COOKIES) += syncookies.o
 ipv6-$(CONFIG_NETLABEL) += calipso.o
-ipv6-$(CONFIG_IPV6_SEG6_LWTUNNEL) += seg6_iptunnel.o
+ipv6-$(CONFIG_IPV6_SEG6_LWTUNNEL) += seg6_iptunnel.o seg6_local.o
 ipv6-$(CONFIG_IPV6_SEG6_HMAC) += seg6_hmac.o
 
 ipv6-objs += $(ipv6-y)
diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c
index 81c2339b3285..c81407770956 100644
--- a/net/ipv6/seg6.c
+++ b/net/ipv6/seg6.c
@@ -456,6 +456,10 @@ int __init seg6_init(void)
 	err = seg6_iptunnel_init();
 	if (err)
 		goto out_unregister_pernet;
+
+	err = seg6_local_init();
+	if (err)
+		goto out_unregister_pernet;
 #endif
 
 #ifdef CONFIG_IPV6_SEG6_HMAC
@@ -471,6 +475,7 @@ out:
 #ifdef CONFIG_IPV6_SEG6_HMAC
 out_unregister_iptun:
 #ifdef CONFIG_IPV6_SEG6_LWTUNNEL
+	seg6_local_exit();
 	seg6_iptunnel_exit();
 #endif
 #endif
diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c
new file mode 100644
index 000000000000..53615d7e0723
--- /dev/null
+++ b/net/ipv6/seg6_local.c
@@ -0,0 +1,320 @@
+/*
+ *  SR-IPv6 implementation
+ *
+ *  Author:
+ *  David Lebrun <david.lebrun@uclouvain.be>
+ *
+ *
+ *  This program is free software; you can redistribute it and/or
+ *        modify it under the terms of the GNU General Public License
+ *        as published by the Free Software Foundation; either version
+ *        2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/types.h>
+#include <linux/skbuff.h>
+#include <linux/net.h>
+#include <linux/module.h>
+#include <net/ip.h>
+#include <net/lwtunnel.h>
+#include <net/netevent.h>
+#include <net/netns/generic.h>
+#include <net/ip6_fib.h>
+#include <net/route.h>
+#include <net/seg6.h>
+#include <linux/seg6.h>
+#include <linux/seg6_local.h>
+#include <net/addrconf.h>
+#include <net/ip6_route.h>
+#include <net/dst_cache.h>
+#ifdef CONFIG_IPV6_SEG6_HMAC
+#include <net/seg6_hmac.h>
+#endif
+
+struct seg6_local_lwt;
+
+struct seg6_action_desc {
+	int action;
+	unsigned long attrs;
+	int (*input)(struct sk_buff *skb, struct seg6_local_lwt *slwt);
+	int static_headroom;
+};
+
+struct seg6_local_lwt {
+	int action;
+	struct ipv6_sr_hdr *srh;
+	int table;
+	struct in_addr nh4;
+	struct in6_addr nh6;
+	int iif;
+	int oif;
+
+	int headroom;
+	struct seg6_action_desc *desc;
+};
+
+static struct seg6_local_lwt *seg6_local_lwtunnel(struct lwtunnel_state *lwt)
+{
+	return (struct seg6_local_lwt *)lwt->data;
+}
+
+static struct seg6_action_desc seg6_action_table[] = {
+	{
+		.action		= SEG6_LOCAL_ACTION_END,
+		.attrs		= 0,
+	},
+};
+
+static struct seg6_action_desc *__get_action_desc(int action)
+{
+	struct seg6_action_desc *desc;
+	int i, count;
+
+	count = sizeof(seg6_action_table) / sizeof(struct seg6_action_desc);
+	for (i = 0; i < count; i++) {
+		desc = &seg6_action_table[i];
+		if (desc->action == action)
+			return desc;
+	}
+
+	return NULL;
+}
+
+static int seg6_local_input(struct sk_buff *skb)
+{
+	struct dst_entry *orig_dst = skb_dst(skb);
+	struct seg6_action_desc *desc;
+	struct seg6_local_lwt *slwt;
+
+	slwt = seg6_local_lwtunnel(orig_dst->lwtstate);
+	desc = slwt->desc;
+
+	return desc->input(skb, slwt);
+}
+
+static const struct nla_policy seg6_local_policy[SEG6_LOCAL_MAX + 1] = {
+	[SEG6_LOCAL_ACTION]	= { .type = NLA_U32 },
+	[SEG6_LOCAL_SRH]	= { .type = NLA_BINARY },
+	[SEG6_LOCAL_TABLE]	= { .type = NLA_U32 },
+	[SEG6_LOCAL_NH4]	= { .type = NLA_BINARY,
+				    .len = sizeof(struct in_addr) },
+	[SEG6_LOCAL_NH6]	= { .type = NLA_BINARY,
+				    .len = sizeof(struct in6_addr) },
+	[SEG6_LOCAL_IIF]	= { .type = NLA_U32 },
+	[SEG6_LOCAL_OIF]	= { .type = NLA_U32 },
+};
+
+struct seg6_action_param {
+	int (*parse)(struct nlattr **attrs, struct seg6_local_lwt *slwt);
+	int (*put)(struct sk_buff *skb, struct seg6_local_lwt *slwt);
+	int (*cmp)(struct seg6_local_lwt *a, struct seg6_local_lwt *b);
+};
+
+static struct seg6_action_param seg6_action_params[SEG6_LOCAL_MAX + 1] = {
+	[SEG6_LOCAL_SRH]	= { .parse = NULL,
+				    .put = NULL,
+				    .cmp = NULL },
+
+	[SEG6_LOCAL_TABLE]	= { .parse = NULL,
+				    .put = NULL,
+				    .cmp = NULL },
+
+	[SEG6_LOCAL_NH4]	= { .parse = NULL,
+				    .put = NULL,
+				    .cmp = NULL },
+
+	[SEG6_LOCAL_NH6]	= { .parse = NULL,
+				    .put = NULL,
+				    .cmp = NULL },
+
+	[SEG6_LOCAL_IIF]	= { .parse = NULL,
+				    .put = NULL,
+				    .cmp = NULL },
+
+	[SEG6_LOCAL_OIF]	= { .parse = NULL,
+				    .put = NULL,
+				    .cmp = NULL },
+};
+
+static int parse_nla_action(struct nlattr **attrs, struct seg6_local_lwt *slwt)
+{
+	struct seg6_action_param *param;
+	struct seg6_action_desc *desc;
+	int i, err;
+
+	desc = __get_action_desc(slwt->action);
+	if (!desc)
+		return -EINVAL;
+
+	if (!desc->input)
+		return -EOPNOTSUPP;
+
+	slwt->desc = desc;
+	slwt->headroom += desc->static_headroom;
+
+	for (i = 0; i < SEG6_LOCAL_MAX + 1; i++) {
+		if (desc->attrs & (1 << i)) {
+			if (!attrs[i])
+				return -EINVAL;
+
+			param = &seg6_action_params[i];
+
+			err = param->parse(attrs, slwt);
+			if (err < 0)
+				return err;
+		}
+	}
+
+	return 0;
+}
+
+static int seg6_local_build_state(struct nlattr *nla, unsigned int family,
+				  const void *cfg, struct lwtunnel_state **ts,
+				  struct netlink_ext_ack *extack)
+{
+	struct nlattr *tb[SEG6_LOCAL_MAX + 1];
+	struct lwtunnel_state *newts;
+	struct seg6_local_lwt *slwt;
+	int err;
+
+	err = nla_parse_nested(tb, SEG6_LOCAL_MAX, nla, seg6_local_policy,
+			       extack);
+
+	if (err < 0)
+		return err;
+
+	if (!tb[SEG6_LOCAL_ACTION])
+		return -EINVAL;
+
+	newts = lwtunnel_state_alloc(sizeof(*slwt));
+	if (!newts)
+		return -ENOMEM;
+
+	slwt = seg6_local_lwtunnel(newts);
+	slwt->action = nla_get_u32(tb[SEG6_LOCAL_ACTION]);
+
+	err = parse_nla_action(tb, slwt);
+	if (err < 0)
+		goto out_free;
+
+	newts->type = LWTUNNEL_ENCAP_SEG6_LOCAL;
+	newts->flags = LWTUNNEL_STATE_INPUT_REDIRECT;
+	newts->headroom = slwt->headroom;
+
+	*ts = newts;
+
+	return 0;
+
+out_free:
+	kfree(slwt->srh);
+	kfree(newts);
+	return err;
+}
+
+static void seg6_local_destroy_state(struct lwtunnel_state *lwt)
+{
+	struct seg6_local_lwt *slwt = seg6_local_lwtunnel(lwt);
+
+	kfree(slwt->srh);
+}
+
+static int seg6_local_fill_encap(struct sk_buff *skb,
+				 struct lwtunnel_state *lwt)
+{
+	struct seg6_local_lwt *slwt = seg6_local_lwtunnel(lwt);
+	struct seg6_action_param *param;
+	int i, err;
+
+	if (nla_put_u32(skb, SEG6_LOCAL_ACTION, slwt->action))
+		return -EMSGSIZE;
+
+	for (i = 0; i < SEG6_LOCAL_MAX + 1; i++) {
+		if (slwt->desc->attrs & (1 << i)) {
+			param = &seg6_action_params[i];
+			err = param->put(skb, slwt);
+			if (err < 0)
+				return err;
+		}
+	}
+
+	return 0;
+}
+
+static int seg6_local_get_encap_size(struct lwtunnel_state *lwt)
+{
+	struct seg6_local_lwt *slwt = seg6_local_lwtunnel(lwt);
+	unsigned long attrs;
+	int nlsize;
+
+	nlsize = nla_total_size(4); /* action */
+
+	attrs = slwt->desc->attrs;
+
+	if (attrs & (1 << SEG6_LOCAL_SRH))
+		nlsize += nla_total_size((slwt->srh->hdrlen + 1) << 3);
+
+	if (attrs & (1 << SEG6_LOCAL_TABLE))
+		nlsize += nla_total_size(4);
+
+	if (attrs & (1 << SEG6_LOCAL_NH4))
+		nlsize += nla_total_size(4);
+
+	if (attrs & (1 << SEG6_LOCAL_NH6))
+		nlsize += nla_total_size(16);
+
+	if (attrs & (1 << SEG6_LOCAL_IIF))
+		nlsize += nla_total_size(4);
+
+	if (attrs & (1 << SEG6_LOCAL_OIF))
+		nlsize += nla_total_size(4);
+
+	return nlsize;
+}
+
+static int seg6_local_cmp_encap(struct lwtunnel_state *a,
+				struct lwtunnel_state *b)
+{
+	struct seg6_local_lwt *slwt_a, *slwt_b;
+	struct seg6_action_param *param;
+	int i;
+
+	slwt_a = seg6_local_lwtunnel(a);
+	slwt_b = seg6_local_lwtunnel(b);
+
+	if (slwt_a->action != slwt_b->action)
+		return 1;
+
+	if (slwt_a->desc->attrs != slwt_b->desc->attrs)
+		return 1;
+
+	for (i = 0; i < SEG6_LOCAL_MAX + 1; i++) {
+		if (slwt_a->desc->attrs & (1 << i)) {
+			param = &seg6_action_params[i];
+			if (param->cmp(slwt_a, slwt_b))
+				return 1;
+		}
+	}
+
+	return 0;
+}
+
+static const struct lwtunnel_encap_ops seg6_local_ops = {
+	.build_state	= seg6_local_build_state,
+	.destroy_state	= seg6_local_destroy_state,
+	.input		= seg6_local_input,
+	.fill_encap	= seg6_local_fill_encap,
+	.get_encap_size	= seg6_local_get_encap_size,
+	.cmp_encap	= seg6_local_cmp_encap,
+	.owner		= THIS_MODULE,
+};
+
+int __init seg6_local_init(void)
+{
+	return lwtunnel_encap_add_ops(&seg6_local_ops,
+				      LWTUNNEL_ENCAP_SEG6_LOCAL);
+}
+
+void seg6_local_exit(void)
+{
+	lwtunnel_encap_del_ops(&seg6_local_ops, LWTUNNEL_ENCAP_SEG6_LOCAL);
+}
-- 
cgit v1.2.3


From 6c2c1dcb185f1e44e1c895781dbaba40195234f9 Mon Sep 17 00:00:00 2001
From: Arkadi Sharshevsky <arkadis@mellanox.com>
Date: Sun, 6 Aug 2017 16:15:39 +0300
Subject: net: dsa: Change DSA slave FDB API to be switchdev independent

In order to support FDB add/del to be on a notifier chain the slave
API need to be changed to be switchdev independent.

Signed-off-by: Arkadi Sharshevsky <arkadis@mellanox.com>
Reviewed-by: Vivien Didelot <vivien.didelot@savoirfairelinux.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/b53/b53_common.c       | 12 +++++-------
 drivers/net/dsa/b53/b53_priv.h         |  8 +++-----
 drivers/net/dsa/microchip/ksz_common.c | 34 ++++++++++++++++------------------
 drivers/net/dsa/mt7530.c               | 14 ++++++--------
 drivers/net/dsa/mv88e6xxx/chip.c       | 12 +++++-------
 drivers/net/dsa/qca8k.c                | 15 ++++++---------
 include/net/dsa.h                      |  8 +++-----
 net/dsa/switch.c                       |  8 +++++---
 8 files changed, 49 insertions(+), 62 deletions(-)

(limited to 'include/net')

diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c
index 7f36d3e3c98b..53361796607a 100644
--- a/drivers/net/dsa/b53/b53_common.c
+++ b/drivers/net/dsa/b53/b53_common.c
@@ -1214,8 +1214,7 @@ static int b53_arl_op(struct b53_device *dev, int op, int port,
 }
 
 int b53_fdb_prepare(struct dsa_switch *ds, int port,
-		    const struct switchdev_obj_port_fdb *fdb,
-		    struct switchdev_trans *trans)
+		    const unsigned char *addr, u16 vid)
 {
 	struct b53_device *priv = ds->priv;
 
@@ -1230,22 +1229,21 @@ int b53_fdb_prepare(struct dsa_switch *ds, int port,
 EXPORT_SYMBOL(b53_fdb_prepare);
 
 void b53_fdb_add(struct dsa_switch *ds, int port,
-		 const struct switchdev_obj_port_fdb *fdb,
-		 struct switchdev_trans *trans)
+		 const unsigned char *addr, u16 vid)
 {
 	struct b53_device *priv = ds->priv;
 
-	if (b53_arl_op(priv, 0, port, fdb->addr, fdb->vid, true))
+	if (b53_arl_op(priv, 0, port, addr, vid, true))
 		pr_err("%s: failed to add MAC address\n", __func__);
 }
 EXPORT_SYMBOL(b53_fdb_add);
 
 int b53_fdb_del(struct dsa_switch *ds, int port,
-		const struct switchdev_obj_port_fdb *fdb)
+		const unsigned char *addr, u16 vid)
 {
 	struct b53_device *priv = ds->priv;
 
-	return b53_arl_op(priv, 0, port, fdb->addr, fdb->vid, false);
+	return b53_arl_op(priv, 0, port, addr, vid, false);
 }
 EXPORT_SYMBOL(b53_fdb_del);
 
diff --git a/drivers/net/dsa/b53/b53_priv.h b/drivers/net/dsa/b53/b53_priv.h
index 155a9c48c317..d417bcaec71d 100644
--- a/drivers/net/dsa/b53/b53_priv.h
+++ b/drivers/net/dsa/b53/b53_priv.h
@@ -397,13 +397,11 @@ int b53_vlan_dump(struct dsa_switch *ds, int port,
 		  struct switchdev_obj_port_vlan *vlan,
 		  switchdev_obj_dump_cb_t *cb);
 int b53_fdb_prepare(struct dsa_switch *ds, int port,
-		    const struct switchdev_obj_port_fdb *fdb,
-		    struct switchdev_trans *trans);
+		    const unsigned char *addr, u16 vid);
 void b53_fdb_add(struct dsa_switch *ds, int port,
-		 const struct switchdev_obj_port_fdb *fdb,
-		 struct switchdev_trans *trans);
+		 const unsigned char *addr, u16 vid);
 int b53_fdb_del(struct dsa_switch *ds, int port,
-		const struct switchdev_obj_port_fdb *fdb);
+		const unsigned char *addr, u16 vid);
 int b53_fdb_dump(struct dsa_switch *ds, int port,
 		 struct switchdev_obj_port_fdb *fdb,
 		 switchdev_obj_dump_cb_t *cb);
diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c
index b313ecdf2919..db828080ee93 100644
--- a/drivers/net/dsa/microchip/ksz_common.c
+++ b/drivers/net/dsa/microchip/ksz_common.c
@@ -679,8 +679,7 @@ static int ksz_port_vlan_dump(struct dsa_switch *ds, int port,
 }
 
 static int ksz_port_fdb_prepare(struct dsa_switch *ds, int port,
-				const struct switchdev_obj_port_fdb *fdb,
-				struct switchdev_trans *trans)
+				const unsigned char *addr, u16 vid)
 {
 	/* nothing needed */
 
@@ -707,8 +706,7 @@ struct alu_struct {
 };
 
 static void ksz_port_fdb_add(struct dsa_switch *ds, int port,
-			     const struct switchdev_obj_port_fdb *fdb,
-			     struct switchdev_trans *trans)
+			     const unsigned char *addr, u16 vid)
 {
 	struct ksz_device *dev = ds->priv;
 	u32 alu_table[4];
@@ -717,12 +715,12 @@ static void ksz_port_fdb_add(struct dsa_switch *ds, int port,
 	mutex_lock(&dev->alu_mutex);
 
 	/* find any entry with mac & vid */
-	data = fdb->vid << ALU_FID_INDEX_S;
-	data |= ((fdb->addr[0] << 8) | fdb->addr[1]);
+	data = vid << ALU_FID_INDEX_S;
+	data |= ((addr[0] << 8) | addr[1]);
 	ksz_write32(dev, REG_SW_ALU_INDEX_0, data);
 
-	data = ((fdb->addr[2] << 24) | (fdb->addr[3] << 16));
-	data |= ((fdb->addr[4] << 8) | fdb->addr[5]);
+	data = ((addr[2] << 24) | (addr[3] << 16));
+	data |= ((addr[4] << 8) | addr[5]);
 	ksz_write32(dev, REG_SW_ALU_INDEX_1, data);
 
 	/* start read operation */
@@ -740,12 +738,12 @@ static void ksz_port_fdb_add(struct dsa_switch *ds, int port,
 	/* update ALU entry */
 	alu_table[0] = ALU_V_STATIC_VALID;
 	alu_table[1] |= BIT(port);
-	if (fdb->vid)
+	if (vid)
 		alu_table[1] |= ALU_V_USE_FID;
-	alu_table[2] = (fdb->vid << ALU_V_FID_S);
-	alu_table[2] |= ((fdb->addr[0] << 8) | fdb->addr[1]);
-	alu_table[3] = ((fdb->addr[2] << 24) | (fdb->addr[3] << 16));
-	alu_table[3] |= ((fdb->addr[4] << 8) | fdb->addr[5]);
+	alu_table[2] = (vid << ALU_V_FID_S);
+	alu_table[2] |= ((addr[0] << 8) | addr[1]);
+	alu_table[3] = ((addr[2] << 24) | (addr[3] << 16));
+	alu_table[3] |= ((addr[4] << 8) | addr[5]);
 
 	write_table(ds, alu_table);
 
@@ -760,7 +758,7 @@ exit:
 }
 
 static int ksz_port_fdb_del(struct dsa_switch *ds, int port,
-			    const struct switchdev_obj_port_fdb *fdb)
+			    const unsigned char *addr, u16 vid)
 {
 	struct ksz_device *dev = ds->priv;
 	u32 alu_table[4];
@@ -770,12 +768,12 @@ static int ksz_port_fdb_del(struct dsa_switch *ds, int port,
 	mutex_lock(&dev->alu_mutex);
 
 	/* read any entry with mac & vid */
-	data = fdb->vid << ALU_FID_INDEX_S;
-	data |= ((fdb->addr[0] << 8) | fdb->addr[1]);
+	data = vid << ALU_FID_INDEX_S;
+	data |= ((addr[0] << 8) | addr[1]);
 	ksz_write32(dev, REG_SW_ALU_INDEX_0, data);
 
-	data = ((fdb->addr[2] << 24) | (fdb->addr[3] << 16));
-	data |= ((fdb->addr[4] << 8) | fdb->addr[5]);
+	data = ((addr[2] << 24) | (addr[3] << 16));
+	data |= ((addr[4] << 8) | addr[5]);
 	ksz_write32(dev, REG_SW_ALU_INDEX_1, data);
 
 	/* start read operation */
diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c
index 1e46418a3b74..430e3ab65a49 100644
--- a/drivers/net/dsa/mt7530.c
+++ b/drivers/net/dsa/mt7530.c
@@ -802,8 +802,7 @@ mt7530_port_bridge_leave(struct dsa_switch *ds, int port,
 
 static int
 mt7530_port_fdb_prepare(struct dsa_switch *ds, int port,
-			const struct switchdev_obj_port_fdb *fdb,
-			struct switchdev_trans *trans)
+			const unsigned char *addr, u16 vid)
 {
 	struct mt7530_priv *priv = ds->priv;
 	int ret;
@@ -813,7 +812,7 @@ mt7530_port_fdb_prepare(struct dsa_switch *ds, int port,
 	 * is called while the entry is still available.
 	 */
 	mutex_lock(&priv->reg_mutex);
-	mt7530_fdb_write(priv, fdb->vid, 0, fdb->addr, -1, STATIC_ENT);
+	mt7530_fdb_write(priv, vid, 0, addr, -1, STATIC_ENT);
 	ret = mt7530_fdb_cmd(priv, MT7530_FDB_WRITE, 0);
 	mutex_unlock(&priv->reg_mutex);
 
@@ -822,28 +821,27 @@ mt7530_port_fdb_prepare(struct dsa_switch *ds, int port,
 
 static void
 mt7530_port_fdb_add(struct dsa_switch *ds, int port,
-		    const struct switchdev_obj_port_fdb *fdb,
-		    struct switchdev_trans *trans)
+		    const unsigned char *addr, u16 vid)
 {
 	struct mt7530_priv *priv = ds->priv;
 	u8 port_mask = BIT(port);
 
 	mutex_lock(&priv->reg_mutex);
-	mt7530_fdb_write(priv, fdb->vid, port_mask, fdb->addr, -1, STATIC_ENT);
+	mt7530_fdb_write(priv, vid, port_mask, addr, -1, STATIC_ENT);
 	mt7530_fdb_cmd(priv, MT7530_FDB_WRITE, 0);
 	mutex_unlock(&priv->reg_mutex);
 }
 
 static int
 mt7530_port_fdb_del(struct dsa_switch *ds, int port,
-		    const struct switchdev_obj_port_fdb *fdb)
+		    const unsigned char *addr, u16 vid)
 {
 	struct mt7530_priv *priv = ds->priv;
 	int ret;
 	u8 port_mask = BIT(port);
 
 	mutex_lock(&priv->reg_mutex);
-	mt7530_fdb_write(priv, fdb->vid, port_mask, fdb->addr, -1, STATIC_EMP);
+	mt7530_fdb_write(priv, vid, port_mask, addr, -1, STATIC_EMP);
 	ret = mt7530_fdb_cmd(priv, MT7530_FDB_WRITE, 0);
 	mutex_unlock(&priv->reg_mutex);
 
diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index 521738c4cd17..823697526db4 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -1407,8 +1407,7 @@ static int mv88e6xxx_port_db_load_purge(struct mv88e6xxx_chip *chip, int port,
 }
 
 static int mv88e6xxx_port_fdb_prepare(struct dsa_switch *ds, int port,
-				      const struct switchdev_obj_port_fdb *fdb,
-				      struct switchdev_trans *trans)
+				      const unsigned char *addr, u16 vid)
 {
 	/* We don't need any dynamic resource from the kernel (yet),
 	 * so skip the prepare phase.
@@ -1417,13 +1416,12 @@ static int mv88e6xxx_port_fdb_prepare(struct dsa_switch *ds, int port,
 }
 
 static void mv88e6xxx_port_fdb_add(struct dsa_switch *ds, int port,
-				   const struct switchdev_obj_port_fdb *fdb,
-				   struct switchdev_trans *trans)
+				   const unsigned char *addr, u16 vid)
 {
 	struct mv88e6xxx_chip *chip = ds->priv;
 
 	mutex_lock(&chip->reg_lock);
-	if (mv88e6xxx_port_db_load_purge(chip, port, fdb->addr, fdb->vid,
+	if (mv88e6xxx_port_db_load_purge(chip, port, addr, vid,
 					 MV88E6XXX_G1_ATU_DATA_STATE_UC_STATIC))
 		dev_err(ds->dev, "p%d: failed to load unicast MAC address\n",
 			port);
@@ -1431,13 +1429,13 @@ static void mv88e6xxx_port_fdb_add(struct dsa_switch *ds, int port,
 }
 
 static int mv88e6xxx_port_fdb_del(struct dsa_switch *ds, int port,
-				  const struct switchdev_obj_port_fdb *fdb)
+				  const unsigned char *addr, u16 vid)
 {
 	struct mv88e6xxx_chip *chip = ds->priv;
 	int err;
 
 	mutex_lock(&chip->reg_lock);
-	err = mv88e6xxx_port_db_load_purge(chip, port, fdb->addr, fdb->vid,
+	err = mv88e6xxx_port_db_load_purge(chip, port, addr, vid,
 					   MV88E6XXX_G1_ATU_DATA_STATE_UNUSED);
 	mutex_unlock(&chip->reg_lock);
 
diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c
index 36c169b0c705..2fb5df9dbd64 100644
--- a/drivers/net/dsa/qca8k.c
+++ b/drivers/net/dsa/qca8k.c
@@ -778,8 +778,7 @@ qca8k_port_fdb_insert(struct qca8k_priv *priv, const u8 *addr,
 
 static int
 qca8k_port_fdb_prepare(struct dsa_switch *ds, int port,
-		       const struct switchdev_obj_port_fdb *fdb,
-		       struct switchdev_trans *trans)
+		       const unsigned char *addr, u16 vid)
 {
 	struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv;
 
@@ -788,33 +787,31 @@ qca8k_port_fdb_prepare(struct dsa_switch *ds, int port,
 	 * when port_fdb_add is called an entry is still available. Otherwise
 	 * the last free entry might have been used up by auto learning
 	 */
-	return qca8k_port_fdb_insert(priv, fdb->addr, 0, fdb->vid);
+	return qca8k_port_fdb_insert(priv, addr, 0, vid);
 }
 
 static void
 qca8k_port_fdb_add(struct dsa_switch *ds, int port,
-		   const struct switchdev_obj_port_fdb *fdb,
-		   struct switchdev_trans *trans)
+		   const unsigned char *addr, u16 vid)
 {
 	struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv;
 	u16 port_mask = BIT(port);
 
 	/* Update the FDB entry adding the port_mask */
-	qca8k_port_fdb_insert(priv, fdb->addr, port_mask, fdb->vid);
+	qca8k_port_fdb_insert(priv, addr, port_mask, vid);
 }
 
 static int
 qca8k_port_fdb_del(struct dsa_switch *ds, int port,
-		   const struct switchdev_obj_port_fdb *fdb)
+		   const unsigned char *addr, u16 vid)
 {
 	struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv;
 	u16 port_mask = BIT(port);
-	u16 vid = fdb->vid;
 
 	if (!vid)
 		vid = 1;
 
-	return qca8k_fdb_del(priv, fdb->addr, port_mask, vid);
+	return qca8k_fdb_del(priv, addr, port_mask, vid);
 }
 
 static int
diff --git a/include/net/dsa.h b/include/net/dsa.h
index 0b1a0622b33c..ba11005dac8c 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -386,13 +386,11 @@ struct dsa_switch_ops {
 	 * Forwarding database
 	 */
 	int	(*port_fdb_prepare)(struct dsa_switch *ds, int port,
-				    const struct switchdev_obj_port_fdb *fdb,
-				    struct switchdev_trans *trans);
+				    const unsigned char *addr, u16 vid);
 	void	(*port_fdb_add)(struct dsa_switch *ds, int port,
-				const struct switchdev_obj_port_fdb *fdb,
-				struct switchdev_trans *trans);
+				const unsigned char *addr, u16 vid);
 	int	(*port_fdb_del)(struct dsa_switch *ds, int port,
-				const struct switchdev_obj_port_fdb *fdb);
+				const unsigned char *addr, u16 vid);
 	int	(*port_fdb_dump)(struct dsa_switch *ds, int port,
 				 struct switchdev_obj_port_fdb *fdb,
 				  switchdev_obj_dump_cb_t *cb);
diff --git a/net/dsa/switch.c b/net/dsa/switch.c
index 97e2e9c8cf3f..a9edfbad3889 100644
--- a/net/dsa/switch.c
+++ b/net/dsa/switch.c
@@ -94,10 +94,11 @@ static int dsa_switch_fdb_add(struct dsa_switch *ds,
 		if (!ds->ops->port_fdb_prepare || !ds->ops->port_fdb_add)
 			return -EOPNOTSUPP;
 
-		return ds->ops->port_fdb_prepare(ds, info->port, fdb, trans);
+		return ds->ops->port_fdb_prepare(ds, info->port, fdb->addr,
+						 fdb->vid);
 	}
 
-	ds->ops->port_fdb_add(ds, info->port, fdb, trans);
+	ds->ops->port_fdb_add(ds, info->port, fdb->addr, fdb->vid);
 
 	return 0;
 }
@@ -114,7 +115,8 @@ static int dsa_switch_fdb_del(struct dsa_switch *ds,
 	if (!ds->ops->port_fdb_del)
 		return -EOPNOTSUPP;
 
-	return ds->ops->port_fdb_del(ds, info->port, fdb);
+	return ds->ops->port_fdb_del(ds, info->port, fdb->addr,
+				     fdb->vid);
 }
 
 static int dsa_switch_mdb_add(struct dsa_switch *ds,
-- 
cgit v1.2.3


From 1b6dd556c3045ca5fa31cc1e98a4a43afa680e1e Mon Sep 17 00:00:00 2001
From: Arkadi Sharshevsky <arkadis@mellanox.com>
Date: Sun, 6 Aug 2017 16:15:40 +0300
Subject: net: dsa: Remove prepare phase for FDB

The prepare phase for FDB add is unneeded because most of DSA devices
can have failures during bus transactions (SPI, I2C, etc.), thus, the
prepare phase cannot guarantee success of the commit stage.

The support for learning FDB through notification chain, which will be
introduced in the following patches, will provide the ability to notify
back the bridge about successful offload.

Signed-off-by: Arkadi Sharshevsky <arkadis@mellanox.com>
Reviewed-by: Vivien Didelot <vivien.didelot@savoirfairelinux.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/b53/b53_common.c       | 17 +++--------------
 drivers/net/dsa/b53/b53_priv.h         |  6 ++----
 drivers/net/dsa/bcm_sf2.c              |  1 -
 drivers/net/dsa/microchip/ksz_common.c | 24 ++++++++++--------------
 drivers/net/dsa/mt7530.c               | 25 ++++---------------------
 drivers/net/dsa/mv88e6xxx/chip.c       | 23 +++++++----------------
 drivers/net/dsa/qca8k.c                | 18 +-----------------
 include/net/dsa.h                      |  4 +---
 net/dsa/dsa_priv.h                     |  4 +---
 net/dsa/port.c                         |  4 +---
 net/dsa/slave.c                        |  4 +++-
 net/dsa/switch.c                       | 14 +++-----------
 12 files changed, 36 insertions(+), 108 deletions(-)

(limited to 'include/net')

diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c
index 53361796607a..3cf4f0a04453 100644
--- a/drivers/net/dsa/b53/b53_common.c
+++ b/drivers/net/dsa/b53/b53_common.c
@@ -1213,8 +1213,8 @@ static int b53_arl_op(struct b53_device *dev, int op, int port,
 	return b53_arl_rw_op(dev, 0);
 }
 
-int b53_fdb_prepare(struct dsa_switch *ds, int port,
-		    const unsigned char *addr, u16 vid)
+int b53_fdb_add(struct dsa_switch *ds, int port,
+		const unsigned char *addr, u16 vid)
 {
 	struct b53_device *priv = ds->priv;
 
@@ -1224,17 +1224,7 @@ int b53_fdb_prepare(struct dsa_switch *ds, int port,
 	if (is5325(priv) || is5365(priv))
 		return -EOPNOTSUPP;
 
-	return 0;
-}
-EXPORT_SYMBOL(b53_fdb_prepare);
-
-void b53_fdb_add(struct dsa_switch *ds, int port,
-		 const unsigned char *addr, u16 vid)
-{
-	struct b53_device *priv = ds->priv;
-
-	if (b53_arl_op(priv, 0, port, addr, vid, true))
-		pr_err("%s: failed to add MAC address\n", __func__);
+	return b53_arl_op(priv, 0, port, addr, vid, true);
 }
 EXPORT_SYMBOL(b53_fdb_add);
 
@@ -1563,7 +1553,6 @@ static const struct dsa_switch_ops b53_switch_ops = {
 	.port_vlan_add		= b53_vlan_add,
 	.port_vlan_del		= b53_vlan_del,
 	.port_vlan_dump		= b53_vlan_dump,
-	.port_fdb_prepare	= b53_fdb_prepare,
 	.port_fdb_dump		= b53_fdb_dump,
 	.port_fdb_add		= b53_fdb_add,
 	.port_fdb_del		= b53_fdb_del,
diff --git a/drivers/net/dsa/b53/b53_priv.h b/drivers/net/dsa/b53/b53_priv.h
index d417bcaec71d..f29c892efa6b 100644
--- a/drivers/net/dsa/b53/b53_priv.h
+++ b/drivers/net/dsa/b53/b53_priv.h
@@ -396,10 +396,8 @@ int b53_vlan_del(struct dsa_switch *ds, int port,
 int b53_vlan_dump(struct dsa_switch *ds, int port,
 		  struct switchdev_obj_port_vlan *vlan,
 		  switchdev_obj_dump_cb_t *cb);
-int b53_fdb_prepare(struct dsa_switch *ds, int port,
-		    const unsigned char *addr, u16 vid);
-void b53_fdb_add(struct dsa_switch *ds, int port,
-		 const unsigned char *addr, u16 vid);
+int b53_fdb_add(struct dsa_switch *ds, int port,
+		const unsigned char *addr, u16 vid);
 int b53_fdb_del(struct dsa_switch *ds, int port,
 		const unsigned char *addr, u16 vid);
 int b53_fdb_dump(struct dsa_switch *ds, int port,
diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c
index 558667c814c9..1907b27297c3 100644
--- a/drivers/net/dsa/bcm_sf2.c
+++ b/drivers/net/dsa/bcm_sf2.c
@@ -1022,7 +1022,6 @@ static const struct dsa_switch_ops bcm_sf2_ops = {
 	.port_vlan_add		= b53_vlan_add,
 	.port_vlan_del		= b53_vlan_del,
 	.port_vlan_dump		= b53_vlan_dump,
-	.port_fdb_prepare	= b53_fdb_prepare,
 	.port_fdb_dump		= b53_fdb_dump,
 	.port_fdb_add		= b53_fdb_add,
 	.port_fdb_del		= b53_fdb_del,
diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c
index db828080ee93..b55f3649ff93 100644
--- a/drivers/net/dsa/microchip/ksz_common.c
+++ b/drivers/net/dsa/microchip/ksz_common.c
@@ -678,14 +678,6 @@ static int ksz_port_vlan_dump(struct dsa_switch *ds, int port,
 	return err;
 }
 
-static int ksz_port_fdb_prepare(struct dsa_switch *ds, int port,
-				const unsigned char *addr, u16 vid)
-{
-	/* nothing needed */
-
-	return 0;
-}
-
 struct alu_struct {
 	/* entry 1 */
 	u8	is_static:1;
@@ -705,12 +697,13 @@ struct alu_struct {
 	u8	mac[ETH_ALEN];
 };
 
-static void ksz_port_fdb_add(struct dsa_switch *ds, int port,
-			     const unsigned char *addr, u16 vid)
+static int ksz_port_fdb_add(struct dsa_switch *ds, int port,
+			    const unsigned char *addr, u16 vid)
 {
 	struct ksz_device *dev = ds->priv;
 	u32 alu_table[4];
 	u32 data;
+	int ret = 0;
 
 	mutex_lock(&dev->alu_mutex);
 
@@ -727,7 +720,8 @@ static void ksz_port_fdb_add(struct dsa_switch *ds, int port,
 	ksz_write32(dev, REG_SW_ALU_CTRL__4, ALU_READ | ALU_START);
 
 	/* wait to be finished */
-	if (wait_alu_ready(dev, ALU_START, 1000) < 0) {
+	ret = wait_alu_ready(dev, ALU_START, 1000);
+	if (ret < 0) {
 		dev_dbg(dev->dev, "Failed to read ALU\n");
 		goto exit;
 	}
@@ -750,11 +744,14 @@ static void ksz_port_fdb_add(struct dsa_switch *ds, int port,
 	ksz_write32(dev, REG_SW_ALU_CTRL__4, ALU_WRITE | ALU_START);
 
 	/* wait to be finished */
-	if (wait_alu_ready(dev, ALU_START, 1000) < 0)
-		dev_dbg(dev->dev, "Failed to read ALU\n");
+	ret = wait_alu_ready(dev, ALU_START, 1000);
+	if (ret < 0)
+		dev_dbg(dev->dev, "Failed to write ALU\n");
 
 exit:
 	mutex_unlock(&dev->alu_mutex);
+
+	return ret;
 }
 
 static int ksz_port_fdb_del(struct dsa_switch *ds, int port,
@@ -1128,7 +1125,6 @@ static const struct dsa_switch_ops ksz_switch_ops = {
 	.port_vlan_add		= ksz_port_vlan_add,
 	.port_vlan_del		= ksz_port_vlan_del,
 	.port_vlan_dump		= ksz_port_vlan_dump,
-	.port_fdb_prepare	= ksz_port_fdb_prepare,
 	.port_fdb_dump		= ksz_port_fdb_dump,
 	.port_fdb_add		= ksz_port_fdb_add,
 	.port_fdb_del		= ksz_port_fdb_del,
diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c
index 430e3ab65a49..f92aae8947e6 100644
--- a/drivers/net/dsa/mt7530.c
+++ b/drivers/net/dsa/mt7530.c
@@ -801,35 +801,19 @@ mt7530_port_bridge_leave(struct dsa_switch *ds, int port,
 }
 
 static int
-mt7530_port_fdb_prepare(struct dsa_switch *ds, int port,
-			const unsigned char *addr, u16 vid)
-{
-	struct mt7530_priv *priv = ds->priv;
-	int ret;
-
-	/* Because auto-learned entrie shares the same FDB table.
-	 * an entry is reserved with no port_mask to make sure fdb_add
-	 * is called while the entry is still available.
-	 */
-	mutex_lock(&priv->reg_mutex);
-	mt7530_fdb_write(priv, vid, 0, addr, -1, STATIC_ENT);
-	ret = mt7530_fdb_cmd(priv, MT7530_FDB_WRITE, 0);
-	mutex_unlock(&priv->reg_mutex);
-
-	return ret;
-}
-
-static void
 mt7530_port_fdb_add(struct dsa_switch *ds, int port,
 		    const unsigned char *addr, u16 vid)
 {
 	struct mt7530_priv *priv = ds->priv;
+	int ret;
 	u8 port_mask = BIT(port);
 
 	mutex_lock(&priv->reg_mutex);
 	mt7530_fdb_write(priv, vid, port_mask, addr, -1, STATIC_ENT);
-	mt7530_fdb_cmd(priv, MT7530_FDB_WRITE, 0);
+	ret = mt7530_fdb_cmd(priv, MT7530_FDB_WRITE, 0);
 	mutex_unlock(&priv->reg_mutex);
+
+	return ret;
 }
 
 static int
@@ -1013,7 +997,6 @@ static struct dsa_switch_ops mt7530_switch_ops = {
 	.port_stp_state_set	= mt7530_stp_state_set,
 	.port_bridge_join	= mt7530_port_bridge_join,
 	.port_bridge_leave	= mt7530_port_bridge_leave,
-	.port_fdb_prepare	= mt7530_port_fdb_prepare,
 	.port_fdb_add		= mt7530_port_fdb_add,
 	.port_fdb_del		= mt7530_port_fdb_del,
 	.port_fdb_dump		= mt7530_port_fdb_dump,
diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index 823697526db4..68cc1f6fb75e 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -1406,26 +1406,18 @@ static int mv88e6xxx_port_db_load_purge(struct mv88e6xxx_chip *chip, int port,
 	return mv88e6xxx_g1_atu_loadpurge(chip, vlan.fid, &entry);
 }
 
-static int mv88e6xxx_port_fdb_prepare(struct dsa_switch *ds, int port,
-				      const unsigned char *addr, u16 vid)
-{
-	/* We don't need any dynamic resource from the kernel (yet),
-	 * so skip the prepare phase.
-	 */
-	return 0;
-}
-
-static void mv88e6xxx_port_fdb_add(struct dsa_switch *ds, int port,
-				   const unsigned char *addr, u16 vid)
+static int mv88e6xxx_port_fdb_add(struct dsa_switch *ds, int port,
+				  const unsigned char *addr, u16 vid)
 {
 	struct mv88e6xxx_chip *chip = ds->priv;
+	int err;
 
 	mutex_lock(&chip->reg_lock);
-	if (mv88e6xxx_port_db_load_purge(chip, port, addr, vid,
-					 MV88E6XXX_G1_ATU_DATA_STATE_UC_STATIC))
-		dev_err(ds->dev, "p%d: failed to load unicast MAC address\n",
-			port);
+	err = mv88e6xxx_port_db_load_purge(chip, port, addr, vid,
+					   MV88E6XXX_G1_ATU_DATA_STATE_UC_STATIC);
 	mutex_unlock(&chip->reg_lock);
+
+	return err;
 }
 
 static int mv88e6xxx_port_fdb_del(struct dsa_switch *ds, int port,
@@ -3905,7 +3897,6 @@ static const struct dsa_switch_ops mv88e6xxx_switch_ops = {
 	.port_vlan_add		= mv88e6xxx_port_vlan_add,
 	.port_vlan_del		= mv88e6xxx_port_vlan_del,
 	.port_vlan_dump		= mv88e6xxx_port_vlan_dump,
-	.port_fdb_prepare       = mv88e6xxx_port_fdb_prepare,
 	.port_fdb_add           = mv88e6xxx_port_fdb_add,
 	.port_fdb_del           = mv88e6xxx_port_fdb_del,
 	.port_fdb_dump          = mv88e6xxx_port_fdb_dump,
diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c
index 2fb5df9dbd64..f8ef823349bc 100644
--- a/drivers/net/dsa/qca8k.c
+++ b/drivers/net/dsa/qca8k.c
@@ -777,28 +777,13 @@ qca8k_port_fdb_insert(struct qca8k_priv *priv, const u8 *addr,
 }
 
 static int
-qca8k_port_fdb_prepare(struct dsa_switch *ds, int port,
-		       const unsigned char *addr, u16 vid)
-{
-	struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv;
-
-	/* The FDB table for static and auto learned entries is the same. We
-	 * need to reserve an entry with no port_mask set to make sure that
-	 * when port_fdb_add is called an entry is still available. Otherwise
-	 * the last free entry might have been used up by auto learning
-	 */
-	return qca8k_port_fdb_insert(priv, addr, 0, vid);
-}
-
-static void
 qca8k_port_fdb_add(struct dsa_switch *ds, int port,
 		   const unsigned char *addr, u16 vid)
 {
 	struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv;
 	u16 port_mask = BIT(port);
 
-	/* Update the FDB entry adding the port_mask */
-	qca8k_port_fdb_insert(priv, addr, port_mask, vid);
+	return qca8k_port_fdb_insert(priv, addr, port_mask, vid);
 }
 
 static int
@@ -866,7 +851,6 @@ static const struct dsa_switch_ops qca8k_switch_ops = {
 	.port_stp_state_set	= qca8k_port_stp_state_set,
 	.port_bridge_join	= qca8k_port_bridge_join,
 	.port_bridge_leave	= qca8k_port_bridge_leave,
-	.port_fdb_prepare	= qca8k_port_fdb_prepare,
 	.port_fdb_add		= qca8k_port_fdb_add,
 	.port_fdb_del		= qca8k_port_fdb_del,
 	.port_fdb_dump		= qca8k_port_fdb_dump,
diff --git a/include/net/dsa.h b/include/net/dsa.h
index ba11005dac8c..446fc438cb80 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -385,9 +385,7 @@ struct dsa_switch_ops {
 	/*
 	 * Forwarding database
 	 */
-	int	(*port_fdb_prepare)(struct dsa_switch *ds, int port,
-				    const unsigned char *addr, u16 vid);
-	void	(*port_fdb_add)(struct dsa_switch *ds, int port,
+	int	(*port_fdb_add)(struct dsa_switch *ds, int port,
 				const unsigned char *addr, u16 vid);
 	int	(*port_fdb_del)(struct dsa_switch *ds, int port,
 				const unsigned char *addr, u16 vid);
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index 945ded148c9c..04cd711ed2fd 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -44,7 +44,6 @@ struct dsa_notifier_bridge_info {
 /* DSA_NOTIFIER_FDB_* */
 struct dsa_notifier_fdb_info {
 	const struct switchdev_obj_port_fdb *fdb;
-	struct switchdev_trans *trans;
 	int sw_index;
 	int port;
 };
@@ -122,8 +121,7 @@ int dsa_port_vlan_filtering(struct dsa_port *dp, bool vlan_filtering,
 int dsa_port_ageing_time(struct dsa_port *dp, clock_t ageing_clock,
 			 struct switchdev_trans *trans);
 int dsa_port_fdb_add(struct dsa_port *dp,
-		     const struct switchdev_obj_port_fdb *fdb,
-		     struct switchdev_trans *trans);
+		     const struct switchdev_obj_port_fdb *fdb);
 int dsa_port_fdb_del(struct dsa_port *dp,
 		     const struct switchdev_obj_port_fdb *fdb);
 int dsa_port_fdb_dump(struct dsa_port *dp, struct switchdev_obj_port_fdb *fdb,
diff --git a/net/dsa/port.c b/net/dsa/port.c
index efc3bce3a89d..bd271b9cc1f2 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -147,13 +147,11 @@ int dsa_port_ageing_time(struct dsa_port *dp, clock_t ageing_clock,
 }
 
 int dsa_port_fdb_add(struct dsa_port *dp,
-		     const struct switchdev_obj_port_fdb *fdb,
-		     struct switchdev_trans *trans)
+		     const struct switchdev_obj_port_fdb *fdb)
 {
 	struct dsa_notifier_fdb_info info = {
 		.sw_index = dp->ds->index,
 		.port = dp->index,
-		.trans = trans,
 		.fdb = fdb,
 	};
 
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 3b36c47472c6..bb7ab26ef768 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -251,7 +251,9 @@ static int dsa_slave_port_obj_add(struct net_device *dev,
 
 	switch (obj->id) {
 	case SWITCHDEV_OBJ_ID_PORT_FDB:
-		err = dsa_port_fdb_add(dp, SWITCHDEV_OBJ_PORT_FDB(obj), trans);
+		if (switchdev_trans_ph_prepare(trans))
+			return 0;
+		err = dsa_port_fdb_add(dp, SWITCHDEV_OBJ_PORT_FDB(obj));
 		break;
 	case SWITCHDEV_OBJ_ID_PORT_MDB:
 		err = dsa_port_mdb_add(dp, SWITCHDEV_OBJ_PORT_MDB(obj), trans);
diff --git a/net/dsa/switch.c b/net/dsa/switch.c
index a9edfbad3889..eb20e0fee0e1 100644
--- a/net/dsa/switch.c
+++ b/net/dsa/switch.c
@@ -84,23 +84,15 @@ static int dsa_switch_fdb_add(struct dsa_switch *ds,
 			      struct dsa_notifier_fdb_info *info)
 {
 	const struct switchdev_obj_port_fdb *fdb = info->fdb;
-	struct switchdev_trans *trans = info->trans;
 
 	/* Do not care yet about other switch chips of the fabric */
 	if (ds->index != info->sw_index)
 		return 0;
 
-	if (switchdev_trans_ph_prepare(trans)) {
-		if (!ds->ops->port_fdb_prepare || !ds->ops->port_fdb_add)
-			return -EOPNOTSUPP;
-
-		return ds->ops->port_fdb_prepare(ds, info->port, fdb->addr,
-						 fdb->vid);
-	}
-
-	ds->ops->port_fdb_add(ds, info->port, fdb->addr, fdb->vid);
+	if (!ds->ops->port_fdb_add)
+		return -EOPNOTSUPP;
 
-	return 0;
+	return ds->ops->port_fdb_add(ds, info->port, fdb->addr, fdb->vid);
 }
 
 static int dsa_switch_fdb_del(struct dsa_switch *ds,
-- 
cgit v1.2.3


From c069fcd82c571953b8aaf68769afe9ccb1aa7a9f Mon Sep 17 00:00:00 2001
From: Arkadi Sharshevsky <arkadis@mellanox.com>
Date: Sun, 6 Aug 2017 16:15:46 +0300
Subject: net: dsa: Remove support for bypass bridge port attributes/vlan set

The bridge port attributes/vlan for DSA devices should be set only
from bridge code. Furthermore, The vlans are synced totally with the
bridge so there is no need for special dump support.

Signed-off-by: Arkadi Sharshevsky <arkadis@mellanox.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dsa.h  |  4 ----
 net/dsa/dsa_priv.h |  4 ----
 net/dsa/port.c     | 12 ------------
 net/dsa/slave.c    |  6 ------
 4 files changed, 26 deletions(-)

(limited to 'include/net')

diff --git a/include/net/dsa.h b/include/net/dsa.h
index 446fc438cb80..d7b9bdd0e768 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -378,10 +378,6 @@ struct dsa_switch_ops {
 				 struct switchdev_trans *trans);
 	int	(*port_vlan_del)(struct dsa_switch *ds, int port,
 				 const struct switchdev_obj_port_vlan *vlan);
-	int	(*port_vlan_dump)(struct dsa_switch *ds, int port,
-				  struct switchdev_obj_port_vlan *vlan,
-				  switchdev_obj_dump_cb_t *cb);
-
 	/*
 	 * Forwarding database
 	 */
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index 5d4e668b9471..7f297bcefa0c 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -147,10 +147,6 @@ int dsa_port_vlan_add(struct dsa_port *dp,
 		      struct switchdev_trans *trans);
 int dsa_port_vlan_del(struct dsa_port *dp,
 		      const struct switchdev_obj_port_vlan *vlan);
-int dsa_port_vlan_dump(struct dsa_port *dp,
-		       struct switchdev_obj_port_vlan *vlan,
-		       switchdev_obj_dump_cb_t *cb);
-
 /* slave.c */
 extern const struct dsa_device_ops notag_netdev_ops;
 void dsa_slave_mii_bus_init(struct dsa_switch *ds);
diff --git a/net/dsa/port.c b/net/dsa/port.c
index 86e0585215bf..ce1921663cdd 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -246,15 +246,3 @@ int dsa_port_vlan_del(struct dsa_port *dp,
 
 	return dsa_port_notify(dp, DSA_NOTIFIER_VLAN_DEL, &info);
 }
-
-int dsa_port_vlan_dump(struct dsa_port *dp,
-		       struct switchdev_obj_port_vlan *vlan,
-		       switchdev_obj_dump_cb_t *cb)
-{
-	struct dsa_switch *ds = dp->ds;
-
-	if (ds->ops->port_vlan_dump)
-		return ds->ops->port_vlan_dump(ds, dp->index, vlan, cb);
-
-	return -EOPNOTSUPP;
-}
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index e9c1d8c5de0f..ccf670679343 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -302,9 +302,6 @@ static int dsa_slave_port_obj_dump(struct net_device *dev,
 	case SWITCHDEV_OBJ_ID_PORT_MDB:
 		err = dsa_port_mdb_dump(dp, SWITCHDEV_OBJ_PORT_MDB(obj), cb);
 		break;
-	case SWITCHDEV_OBJ_ID_PORT_VLAN:
-		err = dsa_port_vlan_dump(dp, SWITCHDEV_OBJ_PORT_VLAN(obj), cb);
-		break;
 	default:
 		err = -EOPNOTSUPP;
 		break;
@@ -988,9 +985,6 @@ static const struct net_device_ops dsa_slave_netdev_ops = {
 	.ndo_netpoll_cleanup	= dsa_slave_netpoll_cleanup,
 	.ndo_poll_controller	= dsa_slave_poll_controller,
 #endif
-	.ndo_bridge_getlink	= switchdev_port_bridge_getlink,
-	.ndo_bridge_setlink	= switchdev_port_bridge_setlink,
-	.ndo_bridge_dellink	= switchdev_port_bridge_dellink,
 	.ndo_get_phys_port_name	= dsa_slave_get_phys_port_name,
 	.ndo_setup_tc		= dsa_slave_setup_tc,
 	.ndo_get_stats64	= dsa_slave_get_stats64,
-- 
cgit v1.2.3


From dc0cbff3ff9fe331160c2be2b3f47564e247137d Mon Sep 17 00:00:00 2001
From: Arkadi Sharshevsky <arkadis@mellanox.com>
Date: Sun, 6 Aug 2017 16:15:48 +0300
Subject: net: dsa: Remove redundant MDB dump support

Currently the MDB HW database is synced with the bridge's one, thus,
There is no need to support special dump functionality.

Signed-off-by: Arkadi Sharshevsky <arkadis@mellanox.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dsa.h  |  4 ----
 net/dsa/dsa_priv.h |  2 --
 net/dsa/port.c     | 11 -----------
 net/dsa/slave.c    |  3 ---
 4 files changed, 20 deletions(-)

(limited to 'include/net')

diff --git a/include/net/dsa.h b/include/net/dsa.h
index d7b9bdd0e768..4ef185997b6f 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -400,10 +400,6 @@ struct dsa_switch_ops {
 				struct switchdev_trans *trans);
 	int	(*port_mdb_del)(struct dsa_switch *ds, int port,
 				const struct switchdev_obj_port_mdb *mdb);
-	int	(*port_mdb_dump)(struct dsa_switch *ds, int port,
-				 struct switchdev_obj_port_mdb *mdb,
-				  switchdev_obj_dump_cb_t *cb);
-
 	/*
 	 * RXNFC
 	 */
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index 7f297bcefa0c..9c890de6e4dd 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -140,8 +140,6 @@ int dsa_port_mdb_add(struct dsa_port *dp,
 		     struct switchdev_trans *trans);
 int dsa_port_mdb_del(struct dsa_port *dp,
 		     const struct switchdev_obj_port_mdb *mdb);
-int dsa_port_mdb_dump(struct dsa_port *dp, struct switchdev_obj_port_mdb *mdb,
-		      switchdev_obj_dump_cb_t *cb);
 int dsa_port_vlan_add(struct dsa_port *dp,
 		      const struct switchdev_obj_port_vlan *vlan,
 		      struct switchdev_trans *trans);
diff --git a/net/dsa/port.c b/net/dsa/port.c
index ce1921663cdd..73787828953a 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -210,17 +210,6 @@ int dsa_port_mdb_del(struct dsa_port *dp,
 	return dsa_port_notify(dp, DSA_NOTIFIER_MDB_DEL, &info);
 }
 
-int dsa_port_mdb_dump(struct dsa_port *dp, struct switchdev_obj_port_mdb *mdb,
-		      switchdev_obj_dump_cb_t *cb)
-{
-	struct dsa_switch *ds = dp->ds;
-
-	if (ds->ops->port_mdb_dump)
-		return ds->ops->port_mdb_dump(ds, dp->index, mdb, cb);
-
-	return -EOPNOTSUPP;
-}
-
 int dsa_port_vlan_add(struct dsa_port *dp,
 		      const struct switchdev_obj_port_vlan *vlan,
 		      struct switchdev_trans *trans)
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index ccf670679343..5807c905fd1d 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -299,9 +299,6 @@ static int dsa_slave_port_obj_dump(struct net_device *dev,
 	case SWITCHDEV_OBJ_ID_PORT_FDB:
 		err = dsa_port_fdb_dump(dp, SWITCHDEV_OBJ_PORT_FDB(obj), cb);
 		break;
-	case SWITCHDEV_OBJ_ID_PORT_MDB:
-		err = dsa_port_mdb_dump(dp, SWITCHDEV_OBJ_PORT_MDB(obj), cb);
-		break;
 	default:
 		err = -EOPNOTSUPP;
 		break;
-- 
cgit v1.2.3


From 2bedde1abbef5eec211308f0293dd7681b0513ec Mon Sep 17 00:00:00 2001
From: Arkadi Sharshevsky <arkadis@mellanox.com>
Date: Sun, 6 Aug 2017 16:15:49 +0300
Subject: net: dsa: Move FDB dump implementation inside DSA

>From all switchdev devices only DSA requires special FDB dump. This is due
to lack of ability for syncing the hardware learned FDBs with the bridge.
Due to this it is removed from switchdev and moved inside DSA.

Signed-off-by: Arkadi Sharshevsky <arkadis@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/b53/b53_common.c       |  16 ++----
 drivers/net/dsa/b53/b53_priv.h         |   3 +-
 drivers/net/dsa/microchip/ksz_common.c |  20 ++-----
 drivers/net/dsa/mt7530.c               |  10 +---
 drivers/net/dsa/mv88e6xxx/chip.c       |  38 ++++---------
 drivers/net/dsa/qca8k.c                |  15 ++---
 include/net/dsa.h                      |   5 +-
 include/net/switchdev.h                |  12 ----
 net/dsa/dsa_priv.h                     |   2 -
 net/dsa/port.c                         |  11 ----
 net/dsa/slave.c                        | 100 +++++++++++++++++++++++++--------
 net/switchdev/switchdev.c              |  84 ---------------------------
 12 files changed, 112 insertions(+), 204 deletions(-)

(limited to 'include/net')

diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c
index 0176d8087344..274f3679f33d 100644
--- a/drivers/net/dsa/b53/b53_common.c
+++ b/drivers/net/dsa/b53/b53_common.c
@@ -1227,8 +1227,7 @@ static void b53_arl_search_rd(struct b53_device *dev, u8 idx,
 }
 
 static int b53_fdb_copy(int port, const struct b53_arl_entry *ent,
-			struct switchdev_obj_port_fdb *fdb,
-			switchdev_obj_dump_cb_t *cb)
+			dsa_fdb_dump_cb_t *cb, void *data)
 {
 	if (!ent->is_valid)
 		return 0;
@@ -1236,16 +1235,11 @@ static int b53_fdb_copy(int port, const struct b53_arl_entry *ent,
 	if (port != ent->port)
 		return 0;
 
-	ether_addr_copy(fdb->addr, ent->mac);
-	fdb->vid = ent->vid;
-	fdb->ndm_state = ent->is_static ? NUD_NOARP : NUD_REACHABLE;
-
-	return cb(&fdb->obj);
+	return cb(ent->mac, ent->vid, ent->is_static, data);
 }
 
 int b53_fdb_dump(struct dsa_switch *ds, int port,
-		 struct switchdev_obj_port_fdb *fdb,
-		 switchdev_obj_dump_cb_t *cb)
+		 dsa_fdb_dump_cb_t *cb, void *data)
 {
 	struct b53_device *priv = ds->priv;
 	struct b53_arl_entry results[2];
@@ -1263,13 +1257,13 @@ int b53_fdb_dump(struct dsa_switch *ds, int port,
 			return ret;
 
 		b53_arl_search_rd(priv, 0, &results[0]);
-		ret = b53_fdb_copy(port, &results[0], fdb, cb);
+		ret = b53_fdb_copy(port, &results[0], cb, data);
 		if (ret)
 			return ret;
 
 		if (priv->num_arl_entries > 2) {
 			b53_arl_search_rd(priv, 1, &results[1]);
-			ret = b53_fdb_copy(port, &results[1], fdb, cb);
+			ret = b53_fdb_copy(port, &results[1], cb, data);
 			if (ret)
 				return ret;
 
diff --git a/drivers/net/dsa/b53/b53_priv.h b/drivers/net/dsa/b53/b53_priv.h
index af5d6c166bff..01bd8cbe9a3f 100644
--- a/drivers/net/dsa/b53/b53_priv.h
+++ b/drivers/net/dsa/b53/b53_priv.h
@@ -398,8 +398,7 @@ int b53_fdb_add(struct dsa_switch *ds, int port,
 int b53_fdb_del(struct dsa_switch *ds, int port,
 		const unsigned char *addr, u16 vid);
 int b53_fdb_dump(struct dsa_switch *ds, int port,
-		 struct switchdev_obj_port_fdb *fdb,
-		 switchdev_obj_dump_cb_t *cb);
+		 dsa_fdb_dump_cb_t *cb, void *data);
 int b53_mirror_add(struct dsa_switch *ds, int port,
 		   struct dsa_mall_mirror_tc_entry *mirror, bool ingress);
 void b53_mirror_del(struct dsa_switch *ds, int port,
diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c
index 4de9d90a4bb3..56cd6d365352 100644
--- a/drivers/net/dsa/microchip/ksz_common.c
+++ b/drivers/net/dsa/microchip/ksz_common.c
@@ -805,12 +805,11 @@ static void convert_alu(struct alu_struct *alu, u32 *alu_table)
 }
 
 static int ksz_port_fdb_dump(struct dsa_switch *ds, int port,
-			     struct switchdev_obj_port_fdb *fdb,
-			     switchdev_obj_dump_cb_t *cb)
+			     dsa_fdb_dump_cb_t *cb, void *data)
 {
 	struct ksz_device *dev = ds->priv;
 	int ret = 0;
-	u32 data;
+	u32 ksz_data;
 	u32 alu_table[4];
 	struct alu_struct alu;
 	int timeout;
@@ -823,8 +822,8 @@ static int ksz_port_fdb_dump(struct dsa_switch *ds, int port,
 	do {
 		timeout = 1000;
 		do {
-			ksz_read32(dev, REG_SW_ALU_CTRL__4, &data);
-			if ((data & ALU_VALID) || !(data & ALU_START))
+			ksz_read32(dev, REG_SW_ALU_CTRL__4, &ksz_data);
+			if ((ksz_data & ALU_VALID) || !(ksz_data & ALU_START))
 				break;
 			usleep_range(1, 10);
 		} while (timeout-- > 0);
@@ -841,18 +840,11 @@ static int ksz_port_fdb_dump(struct dsa_switch *ds, int port,
 		convert_alu(&alu, alu_table);
 
 		if (alu.port_forward & BIT(port)) {
-			fdb->vid = alu.fid;
-			if (alu.is_static)
-				fdb->ndm_state = NUD_NOARP;
-			else
-				fdb->ndm_state = NUD_REACHABLE;
-			ether_addr_copy(fdb->addr, alu.mac);
-
-			ret = cb(&fdb->obj);
+			ret = cb(alu.mac, alu.fid, alu.is_static, data);
 			if (ret)
 				goto exit;
 		}
-	} while (data & ALU_START);
+	} while (ksz_data & ALU_START);
 
 exit:
 
diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c
index f92aae8947e6..12700710f26d 100644
--- a/drivers/net/dsa/mt7530.c
+++ b/drivers/net/dsa/mt7530.c
@@ -834,8 +834,7 @@ mt7530_port_fdb_del(struct dsa_switch *ds, int port,
 
 static int
 mt7530_port_fdb_dump(struct dsa_switch *ds, int port,
-		     struct switchdev_obj_port_fdb *fdb,
-		     switchdev_obj_dump_cb_t *cb)
+		     dsa_fdb_dump_cb_t *cb, void *data)
 {
 	struct mt7530_priv *priv = ds->priv;
 	struct mt7530_fdb _fdb = { 0 };
@@ -853,11 +852,8 @@ mt7530_port_fdb_dump(struct dsa_switch *ds, int port,
 		if (rsp & ATC_SRCH_HIT) {
 			mt7530_fdb_read(priv, &_fdb);
 			if (_fdb.port_mask & BIT(port)) {
-				ether_addr_copy(fdb->addr, _fdb.mac);
-				fdb->vid = _fdb.vid;
-				fdb->ndm_state = _fdb.noarp ?
-						NUD_NOARP : NUD_REACHABLE;
-				ret = cb(&fdb->obj);
+				ret = cb(_fdb.mac, _fdb.vid, _fdb.noarp,
+					 data);
 				if (ret < 0)
 					break;
 			}
diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index 1f5c202b974d..918d8f0fe091 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -1381,10 +1381,10 @@ static int mv88e6xxx_port_fdb_del(struct dsa_switch *ds, int port,
 
 static int mv88e6xxx_port_db_dump_fid(struct mv88e6xxx_chip *chip,
 				      u16 fid, u16 vid, int port,
-				      struct switchdev_obj *obj,
-				      switchdev_obj_dump_cb_t *cb)
+				      dsa_fdb_dump_cb_t *cb, void *data)
 {
 	struct mv88e6xxx_atu_entry addr;
+	bool is_static;
 	int err;
 
 	addr.state = MV88E6XXX_G1_ATU_DATA_STATE_UNUSED;
@@ -1401,24 +1401,12 @@ static int mv88e6xxx_port_db_dump_fid(struct mv88e6xxx_chip *chip,
 		if (addr.trunk || (addr.portvec & BIT(port)) == 0)
 			continue;
 
-		if (obj->id == SWITCHDEV_OBJ_ID_PORT_FDB) {
-			struct switchdev_obj_port_fdb *fdb;
-
-			if (!is_unicast_ether_addr(addr.mac))
-				continue;
-
-			fdb = SWITCHDEV_OBJ_PORT_FDB(obj);
-			fdb->vid = vid;
-			ether_addr_copy(fdb->addr, addr.mac);
-			if (addr.state == MV88E6XXX_G1_ATU_DATA_STATE_UC_STATIC)
-				fdb->ndm_state = NUD_NOARP;
-			else
-				fdb->ndm_state = NUD_REACHABLE;
-		} else {
-			return -EOPNOTSUPP;
-		}
+		if (!is_unicast_ether_addr(addr.mac))
+			continue;
 
-		err = cb(obj);
+		is_static = (addr.state ==
+			     MV88E6XXX_G1_ATU_DATA_STATE_UC_STATIC);
+		err = cb(addr.mac, vid, is_static, data);
 		if (err)
 			return err;
 	} while (!is_broadcast_ether_addr(addr.mac));
@@ -1427,8 +1415,7 @@ static int mv88e6xxx_port_db_dump_fid(struct mv88e6xxx_chip *chip,
 }
 
 static int mv88e6xxx_port_db_dump(struct mv88e6xxx_chip *chip, int port,
-				  struct switchdev_obj *obj,
-				  switchdev_obj_dump_cb_t *cb)
+				  dsa_fdb_dump_cb_t *cb, void *data)
 {
 	struct mv88e6xxx_vtu_entry vlan = {
 		.vid = chip->info->max_vid,
@@ -1441,7 +1428,7 @@ static int mv88e6xxx_port_db_dump(struct mv88e6xxx_chip *chip, int port,
 	if (err)
 		return err;
 
-	err = mv88e6xxx_port_db_dump_fid(chip, fid, 0, port, obj, cb);
+	err = mv88e6xxx_port_db_dump_fid(chip, fid, 0, port, cb, data);
 	if (err)
 		return err;
 
@@ -1455,7 +1442,7 @@ static int mv88e6xxx_port_db_dump(struct mv88e6xxx_chip *chip, int port,
 			break;
 
 		err = mv88e6xxx_port_db_dump_fid(chip, vlan.fid, vlan.vid, port,
-						 obj, cb);
+						 cb, data);
 		if (err)
 			return err;
 	} while (vlan.vid < chip->info->max_vid);
@@ -1464,14 +1451,13 @@ static int mv88e6xxx_port_db_dump(struct mv88e6xxx_chip *chip, int port,
 }
 
 static int mv88e6xxx_port_fdb_dump(struct dsa_switch *ds, int port,
-				   struct switchdev_obj_port_fdb *fdb,
-				   switchdev_obj_dump_cb_t *cb)
+				   dsa_fdb_dump_cb_t *cb, void *data)
 {
 	struct mv88e6xxx_chip *chip = ds->priv;
 	int err;
 
 	mutex_lock(&chip->reg_lock);
-	err = mv88e6xxx_port_db_dump(chip, port, &fdb->obj, cb);
+	err = mv88e6xxx_port_db_dump(chip, port, cb, data);
 	mutex_unlock(&chip->reg_lock);
 
 	return err;
diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c
index f8ef823349bc..17977f06cb98 100644
--- a/drivers/net/dsa/qca8k.c
+++ b/drivers/net/dsa/qca8k.c
@@ -801,27 +801,20 @@ qca8k_port_fdb_del(struct dsa_switch *ds, int port,
 
 static int
 qca8k_port_fdb_dump(struct dsa_switch *ds, int port,
-		    struct switchdev_obj_port_fdb *fdb,
-		    switchdev_obj_dump_cb_t *cb)
+		    dsa_fdb_dump_cb_t *cb, void *data)
 {
 	struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv;
 	struct qca8k_fdb _fdb = { 0 };
 	int cnt = QCA8K_NUM_FDB_RECORDS;
+	bool is_static;
 	int ret = 0;
 
 	mutex_lock(&priv->reg_mutex);
 	while (cnt-- && !qca8k_fdb_next(priv, &_fdb, port)) {
 		if (!_fdb.aging)
 			break;
-
-		ether_addr_copy(fdb->addr, _fdb.mac);
-		fdb->vid = _fdb.vid;
-		if (_fdb.aging == QCA8K_ATU_STATUS_STATIC)
-			fdb->ndm_state = NUD_NOARP;
-		else
-			fdb->ndm_state = NUD_REACHABLE;
-
-		ret = cb(&fdb->obj);
+		is_static = (_fdb.aging == QCA8K_ATU_STATUS_STATIC);
+		ret = cb(_fdb.mac, _fdb.vid, is_static, data);
 		if (ret)
 			break;
 	}
diff --git a/include/net/dsa.h b/include/net/dsa.h
index 4ef185997b6f..a4f66dbb4b7c 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -272,6 +272,8 @@ static inline u8 dsa_upstream_port(struct dsa_switch *ds)
 		return ds->rtable[dst->cpu_dp->ds->index];
 }
 
+typedef int dsa_fdb_dump_cb_t(const unsigned char *addr, u16 vid,
+			      bool is_static, void *data);
 struct dsa_switch_ops {
 	/*
 	 * Legacy probing.
@@ -386,8 +388,7 @@ struct dsa_switch_ops {
 	int	(*port_fdb_del)(struct dsa_switch *ds, int port,
 				const unsigned char *addr, u16 vid);
 	int	(*port_fdb_dump)(struct dsa_switch *ds, int port,
-				 struct switchdev_obj_port_fdb *fdb,
-				  switchdev_obj_dump_cb_t *cb);
+				 dsa_fdb_dump_cb_t *cb, void *data);
 
 	/*
 	 * Multicast database
diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index 8ae9e3b6392e..d2637a685ca6 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -208,9 +208,6 @@ int switchdev_port_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
 int switchdev_port_fdb_del(struct ndmsg *ndm, struct nlattr *tb[],
 			   struct net_device *dev, const unsigned char *addr,
 			   u16 vid);
-int switchdev_port_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
-			    struct net_device *dev,
-			    struct net_device *filter_dev, int *idx);
 void switchdev_port_fwd_mark_set(struct net_device *dev,
 				 struct net_device *group_dev,
 				 bool joining);
@@ -309,15 +306,6 @@ static inline int switchdev_port_fdb_del(struct ndmsg *ndm, struct nlattr *tb[],
 	return -EOPNOTSUPP;
 }
 
-static inline int switchdev_port_fdb_dump(struct sk_buff *skb,
-					  struct netlink_callback *cb,
-					  struct net_device *dev,
-					  struct net_device *filter_dev,
-					  int *idx)
-{
-       return *idx;
-}
-
 static inline bool switchdev_port_same_parent_id(struct net_device *a,
 						 struct net_device *b)
 {
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index 9c890de6e4dd..1debf9c42fc4 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -133,8 +133,6 @@ int dsa_port_fdb_add(struct dsa_port *dp, const unsigned char *addr,
 		     u16 vid);
 int dsa_port_fdb_del(struct dsa_port *dp, const unsigned char *addr,
 		     u16 vid);
-int dsa_port_fdb_dump(struct dsa_port *dp, struct switchdev_obj_port_fdb *fdb,
-		      switchdev_obj_dump_cb_t *cb);
 int dsa_port_mdb_add(struct dsa_port *dp,
 		     const struct switchdev_obj_port_mdb *mdb,
 		     struct switchdev_trans *trans);
diff --git a/net/dsa/port.c b/net/dsa/port.c
index 73787828953a..659676ba3f8b 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -173,17 +173,6 @@ int dsa_port_fdb_del(struct dsa_port *dp, const unsigned char *addr,
 	return dsa_port_notify(dp, DSA_NOTIFIER_FDB_DEL, &info);
 }
 
-int dsa_port_fdb_dump(struct dsa_port *dp, struct switchdev_obj_port_fdb *fdb,
-		      switchdev_obj_dump_cb_t *cb)
-{
-	struct dsa_switch *ds = dp->ds;
-
-	if (ds->ops->port_fdb_dump)
-		return ds->ops->port_fdb_dump(ds, dp->index, fdb, cb);
-
-	return -EOPNOTSUPP;
-}
-
 int dsa_port_mdb_add(struct dsa_port *dp,
 		     const struct switchdev_obj_port_mdb *mdb,
 		     struct switchdev_trans *trans)
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 5807c905fd1d..8c79011c5a83 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -199,6 +199,83 @@ out:
 	return 0;
 }
 
+struct dsa_slave_dump_ctx {
+	struct net_device *dev;
+	struct sk_buff *skb;
+	struct netlink_callback *cb;
+	int idx;
+};
+
+static int
+dsa_slave_port_fdb_do_dump(const unsigned char *addr, u16 vid,
+			   bool is_static, void *data)
+{
+	struct dsa_slave_dump_ctx *dump = data;
+	u32 portid = NETLINK_CB(dump->cb->skb).portid;
+	u32 seq = dump->cb->nlh->nlmsg_seq;
+	struct nlmsghdr *nlh;
+	struct ndmsg *ndm;
+
+	if (dump->idx < dump->cb->args[2])
+		goto skip;
+
+	nlh = nlmsg_put(dump->skb, portid, seq, RTM_NEWNEIGH,
+			sizeof(*ndm), NLM_F_MULTI);
+	if (!nlh)
+		return -EMSGSIZE;
+
+	ndm = nlmsg_data(nlh);
+	ndm->ndm_family  = AF_BRIDGE;
+	ndm->ndm_pad1    = 0;
+	ndm->ndm_pad2    = 0;
+	ndm->ndm_flags   = NTF_SELF;
+	ndm->ndm_type    = 0;
+	ndm->ndm_ifindex = dump->dev->ifindex;
+	ndm->ndm_state   = is_static ? NUD_NOARP : NUD_REACHABLE;
+
+	if (nla_put(dump->skb, NDA_LLADDR, ETH_ALEN, addr))
+		goto nla_put_failure;
+
+	if (vid && nla_put_u16(dump->skb, NDA_VLAN, vid))
+		goto nla_put_failure;
+
+	nlmsg_end(dump->skb, nlh);
+
+skip:
+	dump->idx++;
+	return 0;
+
+nla_put_failure:
+	nlmsg_cancel(dump->skb, nlh);
+	return -EMSGSIZE;
+}
+
+static int
+dsa_slave_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
+		   struct net_device *dev, struct net_device *filter_dev,
+		   int *idx)
+{
+	struct dsa_slave_dump_ctx dump = {
+		.dev = dev,
+		.skb = skb,
+		.cb = cb,
+		.idx = *idx,
+	};
+	struct dsa_slave_priv *p = netdev_priv(dev);
+	struct dsa_port *dp = p->dp;
+	struct dsa_switch *ds = dp->ds;
+	int err;
+
+	if (!ds->ops->port_fdb_dump)
+		return -EOPNOTSUPP;
+
+	err = ds->ops->port_fdb_dump(ds, dp->index,
+				     dsa_slave_port_fdb_do_dump,
+				     &dump);
+	*idx = dump.idx;
+	return err;
+}
+
 static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 {
 	struct dsa_slave_priv *p = netdev_priv(dev);
@@ -287,26 +364,6 @@ static int dsa_slave_port_obj_del(struct net_device *dev,
 	return err;
 }
 
-static int dsa_slave_port_obj_dump(struct net_device *dev,
-				   struct switchdev_obj *obj,
-				   switchdev_obj_dump_cb_t *cb)
-{
-	struct dsa_slave_priv *p = netdev_priv(dev);
-	struct dsa_port *dp = p->dp;
-	int err;
-
-	switch (obj->id) {
-	case SWITCHDEV_OBJ_ID_PORT_FDB:
-		err = dsa_port_fdb_dump(dp, SWITCHDEV_OBJ_PORT_FDB(obj), cb);
-		break;
-	default:
-		err = -EOPNOTSUPP;
-		break;
-	}
-
-	return err;
-}
-
 static int dsa_slave_port_attr_get(struct net_device *dev,
 				   struct switchdev_attr *attr)
 {
@@ -974,7 +1031,7 @@ static const struct net_device_ops dsa_slave_netdev_ops = {
 	.ndo_set_mac_address	= dsa_slave_set_mac_address,
 	.ndo_fdb_add		= dsa_legacy_fdb_add,
 	.ndo_fdb_del		= dsa_legacy_fdb_del,
-	.ndo_fdb_dump		= switchdev_port_fdb_dump,
+	.ndo_fdb_dump		= dsa_slave_fdb_dump,
 	.ndo_do_ioctl		= dsa_slave_ioctl,
 	.ndo_get_iflink		= dsa_slave_get_iflink,
 #ifdef CONFIG_NET_POLL_CONTROLLER
@@ -992,7 +1049,6 @@ static const struct switchdev_ops dsa_slave_switchdev_ops = {
 	.switchdev_port_attr_set	= dsa_slave_port_attr_set,
 	.switchdev_port_obj_add		= dsa_slave_port_obj_add,
 	.switchdev_port_obj_del		= dsa_slave_port_obj_del,
-	.switchdev_port_obj_dump	= dsa_slave_port_obj_dump,
 };
 
 static struct device_type dsa_type = {
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index 25dc67ef9d37..3d32981b9aa1 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -1009,90 +1009,6 @@ int switchdev_port_fdb_del(struct ndmsg *ndm, struct nlattr *tb[],
 }
 EXPORT_SYMBOL_GPL(switchdev_port_fdb_del);
 
-struct switchdev_fdb_dump {
-	struct switchdev_obj_port_fdb fdb;
-	struct net_device *dev;
-	struct sk_buff *skb;
-	struct netlink_callback *cb;
-	int idx;
-};
-
-static int switchdev_port_fdb_dump_cb(struct switchdev_obj *obj)
-{
-	struct switchdev_obj_port_fdb *fdb = SWITCHDEV_OBJ_PORT_FDB(obj);
-	struct switchdev_fdb_dump *dump =
-		container_of(fdb, struct switchdev_fdb_dump, fdb);
-	u32 portid = NETLINK_CB(dump->cb->skb).portid;
-	u32 seq = dump->cb->nlh->nlmsg_seq;
-	struct nlmsghdr *nlh;
-	struct ndmsg *ndm;
-
-	if (dump->idx < dump->cb->args[2])
-		goto skip;
-
-	nlh = nlmsg_put(dump->skb, portid, seq, RTM_NEWNEIGH,
-			sizeof(*ndm), NLM_F_MULTI);
-	if (!nlh)
-		return -EMSGSIZE;
-
-	ndm = nlmsg_data(nlh);
-	ndm->ndm_family  = AF_BRIDGE;
-	ndm->ndm_pad1    = 0;
-	ndm->ndm_pad2    = 0;
-	ndm->ndm_flags   = NTF_SELF;
-	ndm->ndm_type    = 0;
-	ndm->ndm_ifindex = dump->dev->ifindex;
-	ndm->ndm_state   = fdb->ndm_state;
-
-	if (nla_put(dump->skb, NDA_LLADDR, ETH_ALEN, fdb->addr))
-		goto nla_put_failure;
-
-	if (fdb->vid && nla_put_u16(dump->skb, NDA_VLAN, fdb->vid))
-		goto nla_put_failure;
-
-	nlmsg_end(dump->skb, nlh);
-
-skip:
-	dump->idx++;
-	return 0;
-
-nla_put_failure:
-	nlmsg_cancel(dump->skb, nlh);
-	return -EMSGSIZE;
-}
-
-/**
- *	switchdev_port_fdb_dump - Dump port FDB (MAC/VLAN) entries
- *
- *	@skb: netlink skb
- *	@cb: netlink callback
- *	@dev: port device
- *	@filter_dev: filter device
- *	@idx:
- *
- *	Dump FDB entries from switch device.
- */
-int switchdev_port_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
-			    struct net_device *dev,
-			    struct net_device *filter_dev, int *idx)
-{
-	struct switchdev_fdb_dump dump = {
-		.fdb.obj.orig_dev = dev,
-		.fdb.obj.id = SWITCHDEV_OBJ_ID_PORT_FDB,
-		.dev = dev,
-		.skb = skb,
-		.cb = cb,
-		.idx = *idx,
-	};
-	int err;
-
-	err = switchdev_port_obj_dump(dev, &dump.fdb.obj,
-				      switchdev_port_fdb_dump_cb);
-	*idx = dump.idx;
-	return err;
-}
-EXPORT_SYMBOL_GPL(switchdev_port_fdb_dump);
-
 bool switchdev_port_same_parent_id(struct net_device *a,
 				   struct net_device *b)
 {
-- 
cgit v1.2.3


From 29ab586c3d83f81c435e269cace9a1619afb5bbd Mon Sep 17 00:00:00 2001
From: Arkadi Sharshevsky <arkadis@mellanox.com>
Date: Sun, 6 Aug 2017 16:15:51 +0300
Subject: net: switchdev: Remove bridge bypass support from switchdev

Currently the bridge port flags, vlans, FDBs and MDBs can be offloaded
through the bridge code, making the switchdev's SELF bridge bypass
implementation to be redundant. This implies several changes:
- No need for dump infra in switchdev, DSA's special case is handled
  privately.
- Remove obj_dump from switchdev_ops.
- FDBs are removed from obj_add/del routines, due to the fact that they
  are offloaded through the bridge notification chain.
- The switchdev_port_bridge_xx() and switchdev_port_fdb_xx() functions
  can be removed.

Signed-off-by: Arkadi Sharshevsky <arkadis@mellanox.com>
Reviewed-by: Vivien Didelot <vivien.didelot@savoirfairelinux.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Reviewed-by: Ivan Vecera <ivecera@redhat.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/switchdev.h   |  75 --------
 net/switchdev/switchdev.c | 435 ----------------------------------------------
 2 files changed, 510 deletions(-)

(limited to 'include/net')

diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index d2637a685ca6..d767b7991887 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -74,7 +74,6 @@ struct switchdev_attr {
 enum switchdev_obj_id {
 	SWITCHDEV_OBJ_ID_UNDEFINED,
 	SWITCHDEV_OBJ_ID_PORT_VLAN,
-	SWITCHDEV_OBJ_ID_PORT_FDB,
 	SWITCHDEV_OBJ_ID_PORT_MDB,
 };
 
@@ -97,17 +96,6 @@ struct switchdev_obj_port_vlan {
 #define SWITCHDEV_OBJ_PORT_VLAN(obj) \
 	container_of(obj, struct switchdev_obj_port_vlan, obj)
 
-/* SWITCHDEV_OBJ_ID_PORT_FDB */
-struct switchdev_obj_port_fdb {
-	struct switchdev_obj obj;
-	unsigned char addr[ETH_ALEN];
-	u16 vid;
-	u16 ndm_state;
-};
-
-#define SWITCHDEV_OBJ_PORT_FDB(obj) \
-	container_of(obj, struct switchdev_obj_port_fdb, obj)
-
 /* SWITCHDEV_OBJ_ID_PORT_MDB */
 struct switchdev_obj_port_mdb {
 	struct switchdev_obj obj;
@@ -135,8 +123,6 @@ typedef int switchdev_obj_dump_cb_t(struct switchdev_obj *obj);
  * @switchdev_port_obj_add: Add an object to port (see switchdev_obj_*).
  *
  * @switchdev_port_obj_del: Delete an object from port (see switchdev_obj_*).
- *
- * @switchdev_port_obj_dump: Dump port objects (see switchdev_obj_*).
  */
 struct switchdev_ops {
 	int	(*switchdev_port_attr_get)(struct net_device *dev,
@@ -149,9 +135,6 @@ struct switchdev_ops {
 					  struct switchdev_trans *trans);
 	int	(*switchdev_port_obj_del)(struct net_device *dev,
 					  const struct switchdev_obj *obj);
-	int	(*switchdev_port_obj_dump)(struct net_device *dev,
-					   struct switchdev_obj *obj,
-					   switchdev_obj_dump_cb_t *cb);
 };
 
 enum switchdev_notifier_type {
@@ -189,25 +172,10 @@ int switchdev_port_obj_add(struct net_device *dev,
 			   const struct switchdev_obj *obj);
 int switchdev_port_obj_del(struct net_device *dev,
 			   const struct switchdev_obj *obj);
-int switchdev_port_obj_dump(struct net_device *dev, struct switchdev_obj *obj,
-			    switchdev_obj_dump_cb_t *cb);
 int register_switchdev_notifier(struct notifier_block *nb);
 int unregister_switchdev_notifier(struct notifier_block *nb);
 int call_switchdev_notifiers(unsigned long val, struct net_device *dev,
 			     struct switchdev_notifier_info *info);
-int switchdev_port_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
-				  struct net_device *dev, u32 filter_mask,
-				  int nlflags);
-int switchdev_port_bridge_setlink(struct net_device *dev,
-				  struct nlmsghdr *nlh, u16 flags);
-int switchdev_port_bridge_dellink(struct net_device *dev,
-				  struct nlmsghdr *nlh, u16 flags);
-int switchdev_port_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
-			   struct net_device *dev, const unsigned char *addr,
-			   u16 vid, u16 nlm_flags);
-int switchdev_port_fdb_del(struct ndmsg *ndm, struct nlattr *tb[],
-			   struct net_device *dev, const unsigned char *addr,
-			   u16 vid);
 void switchdev_port_fwd_mark_set(struct net_device *dev,
 				 struct net_device *group_dev,
 				 bool joining);
@@ -246,13 +214,6 @@ static inline int switchdev_port_obj_del(struct net_device *dev,
 	return -EOPNOTSUPP;
 }
 
-static inline int switchdev_port_obj_dump(struct net_device *dev,
-					  const struct switchdev_obj *obj,
-					  switchdev_obj_dump_cb_t *cb)
-{
-	return -EOPNOTSUPP;
-}
-
 static inline int register_switchdev_notifier(struct notifier_block *nb)
 {
 	return 0;
@@ -270,42 +231,6 @@ static inline int call_switchdev_notifiers(unsigned long val,
 	return NOTIFY_DONE;
 }
 
-static inline int switchdev_port_bridge_getlink(struct sk_buff *skb, u32 pid,
-					    u32 seq, struct net_device *dev,
-					    u32 filter_mask, int nlflags)
-{
-	return -EOPNOTSUPP;
-}
-
-static inline int switchdev_port_bridge_setlink(struct net_device *dev,
-						struct nlmsghdr *nlh,
-						u16 flags)
-{
-	return -EOPNOTSUPP;
-}
-
-static inline int switchdev_port_bridge_dellink(struct net_device *dev,
-						struct nlmsghdr *nlh,
-						u16 flags)
-{
-	return -EOPNOTSUPP;
-}
-
-static inline int switchdev_port_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
-					 struct net_device *dev,
-					 const unsigned char *addr,
-					 u16 vid, u16 nlm_flags)
-{
-	return -EOPNOTSUPP;
-}
-
-static inline int switchdev_port_fdb_del(struct ndmsg *ndm, struct nlattr *tb[],
-					 struct net_device *dev,
-					 const unsigned char *addr, u16 vid)
-{
-	return -EOPNOTSUPP;
-}
-
 static inline bool switchdev_port_same_parent_id(struct net_device *a,
 						 struct net_device *b)
 {
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index 3d32981b9aa1..0531b41d1f2d 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -343,8 +343,6 @@ static size_t switchdev_obj_size(const struct switchdev_obj *obj)
 	switch (obj->id) {
 	case SWITCHDEV_OBJ_ID_PORT_VLAN:
 		return sizeof(struct switchdev_obj_port_vlan);
-	case SWITCHDEV_OBJ_ID_PORT_FDB:
-		return sizeof(struct switchdev_obj_port_fdb);
 	case SWITCHDEV_OBJ_ID_PORT_MDB:
 		return sizeof(struct switchdev_obj_port_mdb);
 	default:
@@ -534,43 +532,6 @@ int switchdev_port_obj_del(struct net_device *dev,
 }
 EXPORT_SYMBOL_GPL(switchdev_port_obj_del);
 
-/**
- *	switchdev_port_obj_dump - Dump port objects
- *
- *	@dev: port device
- *	@id: object ID
- *	@obj: object to dump
- *	@cb: function to call with a filled object
- *
- *	rtnl_lock must be held.
- */
-int switchdev_port_obj_dump(struct net_device *dev, struct switchdev_obj *obj,
-			    switchdev_obj_dump_cb_t *cb)
-{
-	const struct switchdev_ops *ops = dev->switchdev_ops;
-	struct net_device *lower_dev;
-	struct list_head *iter;
-	int err = -EOPNOTSUPP;
-
-	ASSERT_RTNL();
-
-	if (ops && ops->switchdev_port_obj_dump)
-		return ops->switchdev_port_obj_dump(dev, obj, cb);
-
-	/* Switch device port(s) may be stacked under
-	 * bond/team/vlan dev, so recurse down to dump objects on
-	 * first port at bottom of stack.
-	 */
-
-	netdev_for_each_lower_dev(dev, lower_dev, iter) {
-		err = switchdev_port_obj_dump(lower_dev, obj, cb);
-		break;
-	}
-
-	return err;
-}
-EXPORT_SYMBOL_GPL(switchdev_port_obj_dump);
-
 static ATOMIC_NOTIFIER_HEAD(switchdev_notif_chain);
 
 /**
@@ -613,402 +574,6 @@ int call_switchdev_notifiers(unsigned long val, struct net_device *dev,
 }
 EXPORT_SYMBOL_GPL(call_switchdev_notifiers);
 
-struct switchdev_vlan_dump {
-	struct switchdev_obj_port_vlan vlan;
-	struct sk_buff *skb;
-	u32 filter_mask;
-	u16 flags;
-	u16 begin;
-	u16 end;
-};
-
-static int switchdev_port_vlan_dump_put(struct switchdev_vlan_dump *dump)
-{
-	struct bridge_vlan_info vinfo;
-
-	vinfo.flags = dump->flags;
-
-	if (dump->begin == 0 && dump->end == 0) {
-		return 0;
-	} else if (dump->begin == dump->end) {
-		vinfo.vid = dump->begin;
-		if (nla_put(dump->skb, IFLA_BRIDGE_VLAN_INFO,
-			    sizeof(vinfo), &vinfo))
-			return -EMSGSIZE;
-	} else {
-		vinfo.vid = dump->begin;
-		vinfo.flags |= BRIDGE_VLAN_INFO_RANGE_BEGIN;
-		if (nla_put(dump->skb, IFLA_BRIDGE_VLAN_INFO,
-			    sizeof(vinfo), &vinfo))
-			return -EMSGSIZE;
-		vinfo.vid = dump->end;
-		vinfo.flags &= ~BRIDGE_VLAN_INFO_RANGE_BEGIN;
-		vinfo.flags |= BRIDGE_VLAN_INFO_RANGE_END;
-		if (nla_put(dump->skb, IFLA_BRIDGE_VLAN_INFO,
-			    sizeof(vinfo), &vinfo))
-			return -EMSGSIZE;
-	}
-
-	return 0;
-}
-
-static int switchdev_port_vlan_dump_cb(struct switchdev_obj *obj)
-{
-	struct switchdev_obj_port_vlan *vlan = SWITCHDEV_OBJ_PORT_VLAN(obj);
-	struct switchdev_vlan_dump *dump =
-		container_of(vlan, struct switchdev_vlan_dump, vlan);
-	int err = 0;
-
-	if (vlan->vid_begin > vlan->vid_end)
-		return -EINVAL;
-
-	if (dump->filter_mask & RTEXT_FILTER_BRVLAN) {
-		dump->flags = vlan->flags;
-		for (dump->begin = dump->end = vlan->vid_begin;
-		     dump->begin <= vlan->vid_end;
-		     dump->begin++, dump->end++) {
-			err = switchdev_port_vlan_dump_put(dump);
-			if (err)
-				return err;
-		}
-	} else if (dump->filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED) {
-		if (dump->begin > vlan->vid_begin &&
-		    dump->begin >= vlan->vid_end) {
-			if ((dump->begin - 1) == vlan->vid_end &&
-			    dump->flags == vlan->flags) {
-				/* prepend */
-				dump->begin = vlan->vid_begin;
-			} else {
-				err = switchdev_port_vlan_dump_put(dump);
-				dump->flags = vlan->flags;
-				dump->begin = vlan->vid_begin;
-				dump->end = vlan->vid_end;
-			}
-		} else if (dump->end <= vlan->vid_begin &&
-		           dump->end < vlan->vid_end) {
-			if ((dump->end  + 1) == vlan->vid_begin &&
-			    dump->flags == vlan->flags) {
-				/* append */
-				dump->end = vlan->vid_end;
-			} else {
-				err = switchdev_port_vlan_dump_put(dump);
-				dump->flags = vlan->flags;
-				dump->begin = vlan->vid_begin;
-				dump->end = vlan->vid_end;
-			}
-		} else {
-			err = -EINVAL;
-		}
-	}
-
-	return err;
-}
-
-static int switchdev_port_vlan_fill(struct sk_buff *skb, struct net_device *dev,
-				    u32 filter_mask)
-{
-	struct switchdev_vlan_dump dump = {
-		.vlan.obj.orig_dev = dev,
-		.vlan.obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN,
-		.skb = skb,
-		.filter_mask = filter_mask,
-	};
-	int err = 0;
-
-	if ((filter_mask & RTEXT_FILTER_BRVLAN) ||
-	    (filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED)) {
-		err = switchdev_port_obj_dump(dev, &dump.vlan.obj,
-					      switchdev_port_vlan_dump_cb);
-		if (err)
-			goto err_out;
-		if (filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED)
-			/* last one */
-			err = switchdev_port_vlan_dump_put(&dump);
-	}
-
-err_out:
-	return err == -EOPNOTSUPP ? 0 : err;
-}
-
-/**
- *	switchdev_port_bridge_getlink - Get bridge port attributes
- *
- *	@dev: port device
- *
- *	Called for SELF on rtnl_bridge_getlink to get bridge port
- *	attributes.
- */
-int switchdev_port_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
-				  struct net_device *dev, u32 filter_mask,
-				  int nlflags)
-{
-	struct switchdev_attr attr = {
-		.orig_dev = dev,
-		.id = SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS,
-	};
-	u16 mode = BRIDGE_MODE_UNDEF;
-	u32 mask = BR_LEARNING | BR_LEARNING_SYNC | BR_FLOOD;
-	int err;
-
-	if (!netif_is_bridge_port(dev))
-		return -EOPNOTSUPP;
-
-	err = switchdev_port_attr_get(dev, &attr);
-	if (err && err != -EOPNOTSUPP)
-		return err;
-
-	return ndo_dflt_bridge_getlink(skb, pid, seq, dev, mode,
-				       attr.u.brport_flags, mask, nlflags,
-				       filter_mask, switchdev_port_vlan_fill);
-}
-EXPORT_SYMBOL_GPL(switchdev_port_bridge_getlink);
-
-static int switchdev_port_br_setflag(struct net_device *dev,
-				     struct nlattr *nlattr,
-				     unsigned long brport_flag)
-{
-	struct switchdev_attr attr = {
-		.orig_dev = dev,
-		.id = SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS,
-	};
-	u8 flag = nla_get_u8(nlattr);
-	int err;
-
-	err = switchdev_port_attr_get(dev, &attr);
-	if (err)
-		return err;
-
-	if (flag)
-		attr.u.brport_flags |= brport_flag;
-	else
-		attr.u.brport_flags &= ~brport_flag;
-
-	return switchdev_port_attr_set(dev, &attr);
-}
-
-static const struct nla_policy
-switchdev_port_bridge_policy[IFLA_BRPORT_MAX + 1] = {
-	[IFLA_BRPORT_STATE]		= { .type = NLA_U8 },
-	[IFLA_BRPORT_COST]		= { .type = NLA_U32 },
-	[IFLA_BRPORT_PRIORITY]		= { .type = NLA_U16 },
-	[IFLA_BRPORT_MODE]		= { .type = NLA_U8 },
-	[IFLA_BRPORT_GUARD]		= { .type = NLA_U8 },
-	[IFLA_BRPORT_PROTECT]		= { .type = NLA_U8 },
-	[IFLA_BRPORT_FAST_LEAVE]	= { .type = NLA_U8 },
-	[IFLA_BRPORT_LEARNING]		= { .type = NLA_U8 },
-	[IFLA_BRPORT_LEARNING_SYNC]	= { .type = NLA_U8 },
-	[IFLA_BRPORT_UNICAST_FLOOD]	= { .type = NLA_U8 },
-};
-
-static int switchdev_port_br_setlink_protinfo(struct net_device *dev,
-					      struct nlattr *protinfo)
-{
-	struct nlattr *attr;
-	int rem;
-	int err;
-
-	err = nla_validate_nested(protinfo, IFLA_BRPORT_MAX,
-				  switchdev_port_bridge_policy, NULL);
-	if (err)
-		return err;
-
-	nla_for_each_nested(attr, protinfo, rem) {
-		switch (nla_type(attr)) {
-		case IFLA_BRPORT_LEARNING:
-			err = switchdev_port_br_setflag(dev, attr,
-							BR_LEARNING);
-			break;
-		case IFLA_BRPORT_LEARNING_SYNC:
-			err = switchdev_port_br_setflag(dev, attr,
-							BR_LEARNING_SYNC);
-			break;
-		case IFLA_BRPORT_UNICAST_FLOOD:
-			err = switchdev_port_br_setflag(dev, attr, BR_FLOOD);
-			break;
-		default:
-			err = -EOPNOTSUPP;
-			break;
-		}
-		if (err)
-			return err;
-	}
-
-	return 0;
-}
-
-static int switchdev_port_br_afspec(struct net_device *dev,
-				    struct nlattr *afspec,
-				    int (*f)(struct net_device *dev,
-					     const struct switchdev_obj *obj))
-{
-	struct nlattr *attr;
-	struct bridge_vlan_info *vinfo;
-	struct switchdev_obj_port_vlan vlan = {
-		.obj.orig_dev = dev,
-		.obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN,
-	};
-	int rem;
-	int err;
-
-	nla_for_each_nested(attr, afspec, rem) {
-		if (nla_type(attr) != IFLA_BRIDGE_VLAN_INFO)
-			continue;
-		if (nla_len(attr) != sizeof(struct bridge_vlan_info))
-			return -EINVAL;
-		vinfo = nla_data(attr);
-		if (!vinfo->vid || vinfo->vid >= VLAN_VID_MASK)
-			return -EINVAL;
-		vlan.flags = vinfo->flags;
-		if (vinfo->flags & BRIDGE_VLAN_INFO_RANGE_BEGIN) {
-			if (vlan.vid_begin)
-				return -EINVAL;
-			vlan.vid_begin = vinfo->vid;
-			/* don't allow range of pvids */
-			if (vlan.flags & BRIDGE_VLAN_INFO_PVID)
-				return -EINVAL;
-		} else if (vinfo->flags & BRIDGE_VLAN_INFO_RANGE_END) {
-			if (!vlan.vid_begin)
-				return -EINVAL;
-			vlan.vid_end = vinfo->vid;
-			if (vlan.vid_end <= vlan.vid_begin)
-				return -EINVAL;
-			err = f(dev, &vlan.obj);
-			if (err)
-				return err;
-			vlan.vid_begin = 0;
-		} else {
-			if (vlan.vid_begin)
-				return -EINVAL;
-			vlan.vid_begin = vinfo->vid;
-			vlan.vid_end = vinfo->vid;
-			err = f(dev, &vlan.obj);
-			if (err)
-				return err;
-			vlan.vid_begin = 0;
-		}
-	}
-
-	return 0;
-}
-
-/**
- *	switchdev_port_bridge_setlink - Set bridge port attributes
- *
- *	@dev: port device
- *	@nlh: netlink header
- *	@flags: netlink flags
- *
- *	Called for SELF on rtnl_bridge_setlink to set bridge port
- *	attributes.
- */
-int switchdev_port_bridge_setlink(struct net_device *dev,
-				  struct nlmsghdr *nlh, u16 flags)
-{
-	struct nlattr *protinfo;
-	struct nlattr *afspec;
-	int err = 0;
-
-	if (!netif_is_bridge_port(dev))
-		return -EOPNOTSUPP;
-
-	protinfo = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg),
-				   IFLA_PROTINFO);
-	if (protinfo) {
-		err = switchdev_port_br_setlink_protinfo(dev, protinfo);
-		if (err)
-			return err;
-	}
-
-	afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg),
-				 IFLA_AF_SPEC);
-	if (afspec)
-		err = switchdev_port_br_afspec(dev, afspec,
-					       switchdev_port_obj_add);
-
-	return err;
-}
-EXPORT_SYMBOL_GPL(switchdev_port_bridge_setlink);
-
-/**
- *	switchdev_port_bridge_dellink - Set bridge port attributes
- *
- *	@dev: port device
- *	@nlh: netlink header
- *	@flags: netlink flags
- *
- *	Called for SELF on rtnl_bridge_dellink to set bridge port
- *	attributes.
- */
-int switchdev_port_bridge_dellink(struct net_device *dev,
-				  struct nlmsghdr *nlh, u16 flags)
-{
-	struct nlattr *afspec;
-
-	if (!netif_is_bridge_port(dev))
-		return -EOPNOTSUPP;
-
-	afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg),
-				 IFLA_AF_SPEC);
-	if (afspec)
-		return switchdev_port_br_afspec(dev, afspec,
-						switchdev_port_obj_del);
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(switchdev_port_bridge_dellink);
-
-/**
- *	switchdev_port_fdb_add - Add FDB (MAC/VLAN) entry to port
- *
- *	@ndmsg: netlink hdr
- *	@nlattr: netlink attributes
- *	@dev: port device
- *	@addr: MAC address to add
- *	@vid: VLAN to add
- *
- *	Add FDB entry to switch device.
- */
-int switchdev_port_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
-			   struct net_device *dev, const unsigned char *addr,
-			   u16 vid, u16 nlm_flags)
-{
-	struct switchdev_obj_port_fdb fdb = {
-		.obj.orig_dev = dev,
-		.obj.id = SWITCHDEV_OBJ_ID_PORT_FDB,
-		.vid = vid,
-	};
-
-	ether_addr_copy(fdb.addr, addr);
-	return switchdev_port_obj_add(dev, &fdb.obj);
-}
-EXPORT_SYMBOL_GPL(switchdev_port_fdb_add);
-
-/**
- *	switchdev_port_fdb_del - Delete FDB (MAC/VLAN) entry from port
- *
- *	@ndmsg: netlink hdr
- *	@nlattr: netlink attributes
- *	@dev: port device
- *	@addr: MAC address to delete
- *	@vid: VLAN to delete
- *
- *	Delete FDB entry from switch device.
- */
-int switchdev_port_fdb_del(struct ndmsg *ndm, struct nlattr *tb[],
-			   struct net_device *dev, const unsigned char *addr,
-			   u16 vid)
-{
-	struct switchdev_obj_port_fdb fdb = {
-		.obj.orig_dev = dev,
-		.obj.id = SWITCHDEV_OBJ_ID_PORT_FDB,
-		.vid = vid,
-	};
-
-	ether_addr_copy(fdb.addr, addr);
-	return switchdev_port_obj_del(dev, &fdb.obj);
-}
-EXPORT_SYMBOL_GPL(switchdev_port_fdb_del);
-
 bool switchdev_port_same_parent_id(struct net_device *a,
 				   struct net_device *b)
 {
-- 
cgit v1.2.3


From feca7d8c135bc1527b244fe817b8b6498066ccec Mon Sep 17 00:00:00 2001
From: Vincent Bernat <vincent@bernat.im>
Date: Tue, 8 Aug 2017 20:23:49 +0200
Subject: net: ipv6: avoid overhead when no custom FIB rules are installed
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If the user hasn't installed any custom rules, don't go through the
whole FIB rules layer. This is pretty similar to f4530fa574df (ipv4:
Avoid overhead when no custom FIB rules are installed).

Using a micro-benchmark module [1], timing ip6_route_output() with
get_cycles(), with 40,000 routes in the main routing table, before this
patch:

    min=606 max=12911 count=627 average=1959 95th=4903 90th=3747 50th=1602 mad=821
    table=254 avgdepth=21.8 maxdepth=39
    value │                         ┊                            count
      600 │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒                                         199
      880 │▒▒▒░░░░░░░░░░░░░░░░                                      43
     1160 │▒▒▒░░░░░░░░░░░░░░░░░░░░                                  48
     1440 │▒▒▒░░░░░░░░░░░░░░░░░░░░░░░                               43
     1720 │▒▒▒▒░░░░░░░░░░░░░░░░░░░░░░░░░░░                          59
     2000 │▒▒▒░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░                      50
     2280 │▒▒░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░                    26
     2560 │▒▒░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░                  31
     2840 │▒▒░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░               28
     3120 │▒░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░              17
     3400 │▒░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░             17
     3680 │░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░             8
     3960 │░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░           11
     4240 │░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░            6
     4520 │░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░           6
     4800 │░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░           9

After:

    min=544 max=11687 count=627 average=1776 95th=4546 90th=3585 50th=1227 mad=565
    table=254 avgdepth=21.8 maxdepth=39
    value │                         ┊                            count
      540 │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒                                        201
      800 │▒▒▒▒▒░░░░░░░░░░░░░░░░                                    63
     1060 │▒▒▒▒▒░░░░░░░░░░░░░░░░░░░░░                               68
     1320 │▒▒▒░░░░░░░░░░░░░░░░░░░░░░░░░░                            39
     1580 │▒▒░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░                         32
     1840 │▒▒░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░                       32
     2100 │▒▒░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░                    34
     2360 │▒▒░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░                 33
     2620 │▒▒░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░               26
     2880 │▒░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░              22
     3140 │░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░              9
     3400 │░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░             8
     3660 │░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░             9
     3920 │░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░            8
     4180 │░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░           8
     4440 │░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░           8

At the frequency of the host during the bench (~ 3.7 GHz), this is
about a 100 ns difference on the median value.

A next step would be to collapse local and main tables, as in
0ddcf43d5d4a (ipv4: FIB Local/MAIN table collapse).

[1]: https://github.com/vincentbernat/network-lab/blob/master/lab-routes-ipv6/kbench_mod.c

Signed-off-by: Vincent Bernat <vincent@bernat.im>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Acked-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netns/ipv6.h |  1 +
 net/ipv6/fib6_rules.c    | 40 +++++++++++++++++++++++++++-------------
 net/ipv6/route.c         |  1 +
 3 files changed, 29 insertions(+), 13 deletions(-)

(limited to 'include/net')

diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index abdf3b40303b..0e50bf3ed097 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -65,6 +65,7 @@ struct netns_ipv6 {
 	unsigned int		 ip6_rt_gc_expire;
 	unsigned long		 ip6_rt_last_gc;
 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
+	bool			 fib6_has_custom_rules;
 	struct rt6_info         *ip6_prohibit_entry;
 	struct rt6_info         *ip6_blk_hole_entry;
 	struct fib6_table       *fib6_local_tbl;
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index 2f29e4e33bd3..b240f24a6e52 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -63,19 +63,32 @@ unsigned int fib6_rules_seq_read(struct net *net)
 struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
 				   int flags, pol_lookup_t lookup)
 {
-	struct fib_lookup_arg arg = {
-		.lookup_ptr = lookup,
-		.flags = FIB_LOOKUP_NOREF,
-	};
-
-	/* update flow if oif or iif point to device enslaved to l3mdev */
-	l3mdev_update_flow(net, flowi6_to_flowi(fl6));
-
-	fib_rules_lookup(net->ipv6.fib6_rules_ops,
-			 flowi6_to_flowi(fl6), flags, &arg);
-
-	if (arg.result)
-		return arg.result;
+	if (net->ipv6.fib6_has_custom_rules) {
+		struct fib_lookup_arg arg = {
+			.lookup_ptr = lookup,
+			.flags = FIB_LOOKUP_NOREF,
+		};
+
+		/* update flow if oif or iif point to device enslaved to l3mdev */
+		l3mdev_update_flow(net, flowi6_to_flowi(fl6));
+
+		fib_rules_lookup(net->ipv6.fib6_rules_ops,
+				 flowi6_to_flowi(fl6), flags, &arg);
+
+		if (arg.result)
+			return arg.result;
+	} else {
+		struct rt6_info *rt;
+
+		rt = lookup(net, net->ipv6.fib6_local_tbl, fl6, flags);
+		if (rt != net->ipv6.ip6_null_entry && rt->dst.error != -EAGAIN)
+			return &rt->dst;
+		ip6_rt_put(rt);
+		rt = lookup(net, net->ipv6.fib6_main_tbl, fl6, flags);
+		if (rt->dst.error != -EAGAIN)
+			return &rt->dst;
+		ip6_rt_put(rt);
+	}
 
 	dst_hold(&net->ipv6.ip6_null_entry->dst);
 	return &net->ipv6.ip6_null_entry->dst;
@@ -245,6 +258,7 @@ static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
 	rule6->dst.plen = frh->dst_len;
 	rule6->tclass = frh->tos;
 
+	net->ipv6.fib6_has_custom_rules = true;
 	err = 0;
 errout:
 	return err;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index aba07fce67fb..7ecbe5eb19f8 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -3934,6 +3934,7 @@ static int __net_init ip6_route_net_init(struct net *net)
 			 ip6_template_metrics, true);
 
 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
+	net->ipv6.fib6_has_custom_rules = false;
 	net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
 					       sizeof(*net->ipv6.ip6_prohibit_entry),
 					       GFP_KERNEL);
-- 
cgit v1.2.3


From b97bac64a589d0158cf866e8995e831030f68f4f Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Wed, 9 Aug 2017 20:41:48 +0200
Subject: rtnetlink: make rtnl_register accept a flags parameter

This change allows us to later indicate to rtnetlink core that certain
doit functions should be called without acquiring rtnl_mutex.

This change should have no effect, we simply replace the last (now
unused) calcit argument with the new flag.

Signed-off-by: Florian Westphal <fw@strlen.de>
Reviewed-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/rtnetlink.h  |  5 ++---
 net/bridge/br_mdb.c      |  6 +++---
 net/can/gw.c             |  6 +++---
 net/core/fib_rules.c     |  6 +++---
 net/core/neighbour.c     | 10 +++++-----
 net/core/net_namespace.c |  4 ++--
 net/core/rtnetlink.c     | 36 ++++++++++++++++++------------------
 net/dcb/dcbnl.c          |  4 ++--
 net/decnet/dn_dev.c      |  6 +++---
 net/decnet/dn_fib.c      |  4 ++--
 net/decnet/dn_route.c    |  4 ++--
 net/ipv4/devinet.c       |  8 ++++----
 net/ipv4/fib_frontend.c  |  6 +++---
 net/ipv4/ipmr.c          |  8 ++++----
 net/ipv4/route.c         |  2 +-
 net/ipv6/addrconf.c      | 14 +++++++-------
 net/ipv6/addrlabel.c     |  6 +++---
 net/ipv6/ip6_fib.c       |  2 +-
 net/ipv6/ip6mr.c         |  2 +-
 net/ipv6/route.c         |  6 +++---
 net/mpls/af_mpls.c       |  8 ++++----
 net/phonet/pn_netlink.c  | 12 ++++++------
 net/qrtr/qrtr.c          |  2 +-
 net/sched/act_api.c      |  6 +++---
 net/sched/cls_api.c      |  6 +++---
 net/sched/sch_api.c      | 12 ++++++------
 26 files changed, 95 insertions(+), 96 deletions(-)

(limited to 'include/net')

diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h
index abe6b733d473..ac32460a0adb 100644
--- a/include/net/rtnetlink.h
+++ b/include/net/rtnetlink.h
@@ -7,12 +7,11 @@
 typedef int (*rtnl_doit_func)(struct sk_buff *, struct nlmsghdr *,
 			      struct netlink_ext_ack *);
 typedef int (*rtnl_dumpit_func)(struct sk_buff *, struct netlink_callback *);
-typedef u16 (*rtnl_calcit_func)(struct sk_buff *, struct nlmsghdr *);
 
 int __rtnl_register(int protocol, int msgtype,
-		    rtnl_doit_func, rtnl_dumpit_func, rtnl_calcit_func);
+		    rtnl_doit_func, rtnl_dumpit_func, unsigned int flags);
 void rtnl_register(int protocol, int msgtype,
-		   rtnl_doit_func, rtnl_dumpit_func, rtnl_calcit_func);
+		   rtnl_doit_func, rtnl_dumpit_func, unsigned int flags);
 int rtnl_unregister(int protocol, int msgtype);
 void rtnl_unregister_all(int protocol);
 
diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index a0b11e7d67d9..ca01def49af0 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -713,9 +713,9 @@ static int br_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh,
 
 void br_mdb_init(void)
 {
-	rtnl_register(PF_BRIDGE, RTM_GETMDB, NULL, br_mdb_dump, NULL);
-	rtnl_register(PF_BRIDGE, RTM_NEWMDB, br_mdb_add, NULL, NULL);
-	rtnl_register(PF_BRIDGE, RTM_DELMDB, br_mdb_del, NULL, NULL);
+	rtnl_register(PF_BRIDGE, RTM_GETMDB, NULL, br_mdb_dump, 0);
+	rtnl_register(PF_BRIDGE, RTM_NEWMDB, br_mdb_add, NULL, 0);
+	rtnl_register(PF_BRIDGE, RTM_DELMDB, br_mdb_del, NULL, 0);
 }
 
 void br_mdb_uninit(void)
diff --git a/net/can/gw.c b/net/can/gw.c
index 29748d844c3f..73a02af4b5d7 100644
--- a/net/can/gw.c
+++ b/net/can/gw.c
@@ -1031,15 +1031,15 @@ static __init int cgw_module_init(void)
 	notifier.notifier_call = cgw_notifier;
 	register_netdevice_notifier(&notifier);
 
-	if (__rtnl_register(PF_CAN, RTM_GETROUTE, NULL, cgw_dump_jobs, NULL)) {
+	if (__rtnl_register(PF_CAN, RTM_GETROUTE, NULL, cgw_dump_jobs, 0)) {
 		unregister_netdevice_notifier(&notifier);
 		kmem_cache_destroy(cgw_cache);
 		return -ENOBUFS;
 	}
 
 	/* Only the first call to __rtnl_register can fail */
-	__rtnl_register(PF_CAN, RTM_NEWROUTE, cgw_create_job, NULL, NULL);
-	__rtnl_register(PF_CAN, RTM_DELROUTE, cgw_remove_job, NULL, NULL);
+	__rtnl_register(PF_CAN, RTM_NEWROUTE, cgw_create_job, NULL, 0);
+	__rtnl_register(PF_CAN, RTM_DELROUTE, cgw_remove_job, NULL, 0);
 
 	return 0;
 }
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index fc0b65093417..9a6d97c1d810 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -1026,9 +1026,9 @@ static struct pernet_operations fib_rules_net_ops = {
 static int __init fib_rules_init(void)
 {
 	int err;
-	rtnl_register(PF_UNSPEC, RTM_NEWRULE, fib_nl_newrule, NULL, NULL);
-	rtnl_register(PF_UNSPEC, RTM_DELRULE, fib_nl_delrule, NULL, NULL);
-	rtnl_register(PF_UNSPEC, RTM_GETRULE, NULL, fib_nl_dumprule, NULL);
+	rtnl_register(PF_UNSPEC, RTM_NEWRULE, fib_nl_newrule, NULL, 0);
+	rtnl_register(PF_UNSPEC, RTM_DELRULE, fib_nl_delrule, NULL, 0);
+	rtnl_register(PF_UNSPEC, RTM_GETRULE, NULL, fib_nl_dumprule, 0);
 
 	err = register_pernet_subsys(&fib_rules_net_ops);
 	if (err < 0)
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index d0713627deb6..16a1a4c4eb57 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -3261,13 +3261,13 @@ EXPORT_SYMBOL(neigh_sysctl_unregister);
 
 static int __init neigh_init(void)
 {
-	rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, NULL);
-	rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, NULL);
-	rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info, NULL);
+	rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, 0);
+	rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, 0);
+	rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info, 0);
 
 	rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info,
-		      NULL);
-	rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, NULL);
+		      0);
+	rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, 0);
 
 	return 0;
 }
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 8726d051f31d..a7f06d706aa0 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -855,9 +855,9 @@ static int __init net_ns_init(void)
 
 	register_pernet_subsys(&net_ns_ops);
 
-	rtnl_register(PF_UNSPEC, RTM_NEWNSID, rtnl_net_newid, NULL, NULL);
+	rtnl_register(PF_UNSPEC, RTM_NEWNSID, rtnl_net_newid, NULL, 0);
 	rtnl_register(PF_UNSPEC, RTM_GETNSID, rtnl_net_getid, rtnl_net_dumpid,
-		      NULL);
+		      0);
 
 	return 0;
 }
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 8c9d34deea7d..67607c540c03 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -178,7 +178,7 @@ static rtnl_dumpit_func rtnl_get_dumpit(int protocol, int msgindex)
  * @msgtype: rtnetlink message type
  * @doit: Function pointer called for each request message
  * @dumpit: Function pointer called for each dump request (NLM_F_DUMP) message
- * @calcit: Function pointer to calc size of dump message
+ * @flags: rtnl_link_flags to modifiy behaviour of doit/dumpit functions
  *
  * Registers the specified function pointers (at least one of them has
  * to be non-NULL) to be called whenever a request message for the
@@ -192,7 +192,7 @@ static rtnl_dumpit_func rtnl_get_dumpit(int protocol, int msgindex)
  */
 int __rtnl_register(int protocol, int msgtype,
 		    rtnl_doit_func doit, rtnl_dumpit_func dumpit,
-		    rtnl_calcit_func calcit)
+		    unsigned int flags)
 {
 	struct rtnl_link *tab;
 	int msgindex;
@@ -230,9 +230,9 @@ EXPORT_SYMBOL_GPL(__rtnl_register);
  */
 void rtnl_register(int protocol, int msgtype,
 		   rtnl_doit_func doit, rtnl_dumpit_func dumpit,
-		   rtnl_calcit_func calcit)
+		   unsigned int flags)
 {
-	if (__rtnl_register(protocol, msgtype, doit, dumpit, calcit) < 0)
+	if (__rtnl_register(protocol, msgtype, doit, dumpit, flags) < 0)
 		panic("Unable to register rtnetlink message handler, "
 		      "protocol = %d, message type = %d\n",
 		      protocol, msgtype);
@@ -4279,23 +4279,23 @@ void __init rtnetlink_init(void)
 	register_netdevice_notifier(&rtnetlink_dev_notifier);
 
 	rtnl_register(PF_UNSPEC, RTM_GETLINK, rtnl_getlink,
-		      rtnl_dump_ifinfo, NULL);
-	rtnl_register(PF_UNSPEC, RTM_SETLINK, rtnl_setlink, NULL, NULL);
-	rtnl_register(PF_UNSPEC, RTM_NEWLINK, rtnl_newlink, NULL, NULL);
-	rtnl_register(PF_UNSPEC, RTM_DELLINK, rtnl_dellink, NULL, NULL);
+		      rtnl_dump_ifinfo, 0);
+	rtnl_register(PF_UNSPEC, RTM_SETLINK, rtnl_setlink, NULL, 0);
+	rtnl_register(PF_UNSPEC, RTM_NEWLINK, rtnl_newlink, NULL, 0);
+	rtnl_register(PF_UNSPEC, RTM_DELLINK, rtnl_dellink, NULL, 0);
 
-	rtnl_register(PF_UNSPEC, RTM_GETADDR, NULL, rtnl_dump_all, NULL);
-	rtnl_register(PF_UNSPEC, RTM_GETROUTE, NULL, rtnl_dump_all, NULL);
-	rtnl_register(PF_UNSPEC, RTM_GETNETCONF, NULL, rtnl_dump_all, NULL);
+	rtnl_register(PF_UNSPEC, RTM_GETADDR, NULL, rtnl_dump_all, 0);
+	rtnl_register(PF_UNSPEC, RTM_GETROUTE, NULL, rtnl_dump_all, 0);
+	rtnl_register(PF_UNSPEC, RTM_GETNETCONF, NULL, rtnl_dump_all, 0);
 
-	rtnl_register(PF_BRIDGE, RTM_NEWNEIGH, rtnl_fdb_add, NULL, NULL);
-	rtnl_register(PF_BRIDGE, RTM_DELNEIGH, rtnl_fdb_del, NULL, NULL);
-	rtnl_register(PF_BRIDGE, RTM_GETNEIGH, NULL, rtnl_fdb_dump, NULL);
+	rtnl_register(PF_BRIDGE, RTM_NEWNEIGH, rtnl_fdb_add, NULL, 0);
+	rtnl_register(PF_BRIDGE, RTM_DELNEIGH, rtnl_fdb_del, NULL, 0);
+	rtnl_register(PF_BRIDGE, RTM_GETNEIGH, NULL, rtnl_fdb_dump, 0);
 
-	rtnl_register(PF_BRIDGE, RTM_GETLINK, NULL, rtnl_bridge_getlink, NULL);
-	rtnl_register(PF_BRIDGE, RTM_DELLINK, rtnl_bridge_dellink, NULL, NULL);
-	rtnl_register(PF_BRIDGE, RTM_SETLINK, rtnl_bridge_setlink, NULL, NULL);
+	rtnl_register(PF_BRIDGE, RTM_GETLINK, NULL, rtnl_bridge_getlink, 0);
+	rtnl_register(PF_BRIDGE, RTM_DELLINK, rtnl_bridge_dellink, NULL, 0);
+	rtnl_register(PF_BRIDGE, RTM_SETLINK, rtnl_bridge_setlink, NULL, 0);
 
 	rtnl_register(PF_UNSPEC, RTM_GETSTATS, rtnl_stats_get, rtnl_stats_dump,
-		      NULL);
+		      0);
 }
diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
index 733f523707ac..bae7d78aa068 100644
--- a/net/dcb/dcbnl.c
+++ b/net/dcb/dcbnl.c
@@ -1938,8 +1938,8 @@ static int __init dcbnl_init(void)
 {
 	INIT_LIST_HEAD(&dcb_app_list);
 
-	rtnl_register(PF_UNSPEC, RTM_GETDCB, dcb_doit, NULL, NULL);
-	rtnl_register(PF_UNSPEC, RTM_SETDCB, dcb_doit, NULL, NULL);
+	rtnl_register(PF_UNSPEC, RTM_GETDCB, dcb_doit, NULL, 0);
+	rtnl_register(PF_UNSPEC, RTM_SETDCB, dcb_doit, NULL, 0);
 
 	return 0;
 }
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index fa0110b57ca1..4d339de56862 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -1419,9 +1419,9 @@ void __init dn_dev_init(void)
 
 	dn_dev_devices_on();
 
-	rtnl_register(PF_DECnet, RTM_NEWADDR, dn_nl_newaddr, NULL, NULL);
-	rtnl_register(PF_DECnet, RTM_DELADDR, dn_nl_deladdr, NULL, NULL);
-	rtnl_register(PF_DECnet, RTM_GETADDR, NULL, dn_nl_dump_ifaddr, NULL);
+	rtnl_register(PF_DECnet, RTM_NEWADDR, dn_nl_newaddr, NULL, 0);
+	rtnl_register(PF_DECnet, RTM_DELADDR, dn_nl_deladdr, NULL, 0);
+	rtnl_register(PF_DECnet, RTM_GETADDR, NULL, dn_nl_dump_ifaddr, 0);
 
 	proc_create("decnet_dev", S_IRUGO, init_net.proc_net, &dn_dev_seq_fops);
 
diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c
index f9f6fb3f3c5b..3d37464c8b4a 100644
--- a/net/decnet/dn_fib.c
+++ b/net/decnet/dn_fib.c
@@ -791,8 +791,8 @@ void __init dn_fib_init(void)
 
 	register_dnaddr_notifier(&dn_fib_dnaddr_notifier);
 
-	rtnl_register(PF_DECnet, RTM_NEWROUTE, dn_fib_rtm_newroute, NULL, NULL);
-	rtnl_register(PF_DECnet, RTM_DELROUTE, dn_fib_rtm_delroute, NULL, NULL);
+	rtnl_register(PF_DECnet, RTM_NEWROUTE, dn_fib_rtm_newroute, NULL, 0);
+	rtnl_register(PF_DECnet, RTM_DELROUTE, dn_fib_rtm_delroute, NULL, 0);
 }
 
 
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index bcbe548f8854..0bd3afd01dd2 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -1922,10 +1922,10 @@ void __init dn_route_init(void)
 
 #ifdef CONFIG_DECNET_ROUTER
 	rtnl_register(PF_DECnet, RTM_GETROUTE, dn_cache_getroute,
-		      dn_fib_dump, NULL);
+		      dn_fib_dump, 0);
 #else
 	rtnl_register(PF_DECnet, RTM_GETROUTE, dn_cache_getroute,
-		      dn_cache_dump, NULL);
+		      dn_cache_dump, 0);
 #endif
 }
 
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 38d9af9b917c..d7adc0616599 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -2491,9 +2491,9 @@ void __init devinet_init(void)
 
 	rtnl_af_register(&inet_af_ops);
 
-	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL);
-	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL);
-	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL);
+	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, 0);
+	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, 0);
+	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, 0);
 	rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
-		      inet_netconf_dump_devconf, NULL);
+		      inet_netconf_dump_devconf, 0);
 }
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 2cba559f14df..37819ab4cc74 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -1348,7 +1348,7 @@ void __init ip_fib_init(void)
 	register_netdevice_notifier(&fib_netdev_notifier);
 	register_inetaddr_notifier(&fib_inetaddr_notifier);
 
-	rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL, NULL);
-	rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL, NULL);
-	rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib, NULL);
+	rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL, 0);
+	rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL, 0);
+	rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib, 0);
 }
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 06863ea3fc5b..c9b3e6e069ae 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -3114,14 +3114,14 @@ int __init ip_mr_init(void)
 	}
 #endif
 	rtnl_register(RTNL_FAMILY_IPMR, RTM_GETROUTE,
-		      ipmr_rtm_getroute, ipmr_rtm_dumproute, NULL);
+		      ipmr_rtm_getroute, ipmr_rtm_dumproute, 0);
 	rtnl_register(RTNL_FAMILY_IPMR, RTM_NEWROUTE,
-		      ipmr_rtm_route, NULL, NULL);
+		      ipmr_rtm_route, NULL, 0);
 	rtnl_register(RTNL_FAMILY_IPMR, RTM_DELROUTE,
-		      ipmr_rtm_route, NULL, NULL);
+		      ipmr_rtm_route, NULL, 0);
 
 	rtnl_register(RTNL_FAMILY_IPMR, RTM_GETLINK,
-		      NULL, ipmr_rtm_dumplink, NULL);
+		      NULL, ipmr_rtm_dumplink, 0);
 	return 0;
 
 #ifdef CONFIG_IP_PIMSM_V2
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 0383e66f59bc..2ef46294475f 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -3067,7 +3067,7 @@ int __init ip_rt_init(void)
 	xfrm_init();
 	xfrm4_init();
 #endif
-	rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL, NULL);
+	rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL, 0);
 
 #ifdef CONFIG_SYSCTL
 	register_pernet_subsys(&sysctl_route_ops);
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 30ee23eef268..640792e1ecb7 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -6605,21 +6605,21 @@ int __init addrconf_init(void)
 	rtnl_af_register(&inet6_ops);
 
 	err = __rtnl_register(PF_INET6, RTM_GETLINK, NULL, inet6_dump_ifinfo,
-			      NULL);
+			      0);
 	if (err < 0)
 		goto errout;
 
 	/* Only the first call to __rtnl_register can fail */
-	__rtnl_register(PF_INET6, RTM_NEWADDR, inet6_rtm_newaddr, NULL, NULL);
-	__rtnl_register(PF_INET6, RTM_DELADDR, inet6_rtm_deladdr, NULL, NULL);
+	__rtnl_register(PF_INET6, RTM_NEWADDR, inet6_rtm_newaddr, NULL, 0);
+	__rtnl_register(PF_INET6, RTM_DELADDR, inet6_rtm_deladdr, NULL, 0);
 	__rtnl_register(PF_INET6, RTM_GETADDR, inet6_rtm_getaddr,
-			inet6_dump_ifaddr, NULL);
+			inet6_dump_ifaddr, 0);
 	__rtnl_register(PF_INET6, RTM_GETMULTICAST, NULL,
-			inet6_dump_ifmcaddr, NULL);
+			inet6_dump_ifmcaddr, 0);
 	__rtnl_register(PF_INET6, RTM_GETANYCAST, NULL,
-			inet6_dump_ifacaddr, NULL);
+			inet6_dump_ifacaddr, 0);
 	__rtnl_register(PF_INET6, RTM_GETNETCONF, inet6_netconf_get_devconf,
-			inet6_netconf_dump_devconf, NULL);
+			inet6_netconf_dump_devconf, 0);
 
 	ipv6_addr_label_rtnl_register();
 
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index 7a428f65c7ec..cea5eb488013 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -593,10 +593,10 @@ out:
 void __init ipv6_addr_label_rtnl_register(void)
 {
 	__rtnl_register(PF_INET6, RTM_NEWADDRLABEL, ip6addrlbl_newdel,
-			NULL, NULL);
+			NULL, 0);
 	__rtnl_register(PF_INET6, RTM_DELADDRLABEL, ip6addrlbl_newdel,
-			NULL, NULL);
+			NULL, 0);
 	__rtnl_register(PF_INET6, RTM_GETADDRLABEL, ip6addrlbl_get,
-			ip6addrlbl_dump, NULL);
+			ip6addrlbl_dump, 0);
 }
 
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 69ed0043d117..8c58c7558de0 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -2038,7 +2038,7 @@ int __init fib6_init(void)
 		goto out_kmem_cache_create;
 
 	ret = __rtnl_register(PF_INET6, RTM_GETROUTE, NULL, inet6_dump_fib,
-			      NULL);
+			      0);
 	if (ret)
 		goto out_unregister_subsys;
 
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 7454850f2098..f5500f5444e9 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -1427,7 +1427,7 @@ int __init ip6_mr_init(void)
 	}
 #endif
 	rtnl_register(RTNL_FAMILY_IP6MR, RTM_GETROUTE, NULL,
-		      ip6mr_rtm_dumproute, NULL);
+		      ip6mr_rtm_dumproute, 0);
 	return 0;
 #ifdef CONFIG_IPV6_PIMSM_V2
 add_proto_fail:
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index c73e61750642..035762fed07d 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -4105,9 +4105,9 @@ int __init ip6_route_init(void)
 		goto fib6_rules_init;
 
 	ret = -ENOBUFS;
-	if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
-	    __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
-	    __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
+	if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, 0) ||
+	    __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, 0) ||
+	    __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, 0))
 		goto out_register_late_subsys;
 
 	ret = register_netdevice_notifier(&ip6_route_dev_notifier);
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index ea4f481839dd..c5b9ce41d66f 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -2479,12 +2479,12 @@ static int __init mpls_init(void)
 
 	rtnl_af_register(&mpls_af_ops);
 
-	rtnl_register(PF_MPLS, RTM_NEWROUTE, mpls_rtm_newroute, NULL, NULL);
-	rtnl_register(PF_MPLS, RTM_DELROUTE, mpls_rtm_delroute, NULL, NULL);
+	rtnl_register(PF_MPLS, RTM_NEWROUTE, mpls_rtm_newroute, NULL, 0);
+	rtnl_register(PF_MPLS, RTM_DELROUTE, mpls_rtm_delroute, NULL, 0);
 	rtnl_register(PF_MPLS, RTM_GETROUTE, mpls_getroute, mpls_dump_routes,
-		      NULL);
+		      0);
 	rtnl_register(PF_MPLS, RTM_GETNETCONF, mpls_netconf_get_devconf,
-		      mpls_netconf_dump_devconf, NULL);
+		      mpls_netconf_dump_devconf, 0);
 	err = 0;
 out:
 	return err;
diff --git a/net/phonet/pn_netlink.c b/net/phonet/pn_netlink.c
index 45b3af3080d8..da754fc926e7 100644
--- a/net/phonet/pn_netlink.c
+++ b/net/phonet/pn_netlink.c
@@ -300,15 +300,15 @@ out:
 int __init phonet_netlink_register(void)
 {
 	int err = __rtnl_register(PF_PHONET, RTM_NEWADDR, addr_doit,
-				  NULL, NULL);
+				  NULL, 0);
 	if (err)
 		return err;
 
 	/* Further __rtnl_register() cannot fail */
-	__rtnl_register(PF_PHONET, RTM_DELADDR, addr_doit, NULL, NULL);
-	__rtnl_register(PF_PHONET, RTM_GETADDR, NULL, getaddr_dumpit, NULL);
-	__rtnl_register(PF_PHONET, RTM_NEWROUTE, route_doit, NULL, NULL);
-	__rtnl_register(PF_PHONET, RTM_DELROUTE, route_doit, NULL, NULL);
-	__rtnl_register(PF_PHONET, RTM_GETROUTE, NULL, route_dumpit, NULL);
+	__rtnl_register(PF_PHONET, RTM_DELADDR, addr_doit, NULL, 0);
+	__rtnl_register(PF_PHONET, RTM_GETADDR, NULL, getaddr_dumpit, 0);
+	__rtnl_register(PF_PHONET, RTM_NEWROUTE, route_doit, NULL, 0);
+	__rtnl_register(PF_PHONET, RTM_DELROUTE, route_doit, NULL, 0);
+	__rtnl_register(PF_PHONET, RTM_GETROUTE, NULL, route_dumpit, 0);
 	return 0;
 }
diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c
index 5586609afa27..c2f5c13550c0 100644
--- a/net/qrtr/qrtr.c
+++ b/net/qrtr/qrtr.c
@@ -1081,7 +1081,7 @@ static int __init qrtr_proto_init(void)
 		return rc;
 	}
 
-	rtnl_register(PF_QIPCRTR, RTM_NEWADDR, qrtr_addr_doit, NULL, NULL);
+	rtnl_register(PF_QIPCRTR, RTM_NEWADDR, qrtr_addr_doit, NULL, 0);
 
 	return 0;
 }
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index a2915d958279..02fcb0c78a28 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -1255,10 +1255,10 @@ out_module_put:
 
 static int __init tc_action_init(void)
 {
-	rtnl_register(PF_UNSPEC, RTM_NEWACTION, tc_ctl_action, NULL, NULL);
-	rtnl_register(PF_UNSPEC, RTM_DELACTION, tc_ctl_action, NULL, NULL);
+	rtnl_register(PF_UNSPEC, RTM_NEWACTION, tc_ctl_action, NULL, 0);
+	rtnl_register(PF_UNSPEC, RTM_DELACTION, tc_ctl_action, NULL, 0);
 	rtnl_register(PF_UNSPEC, RTM_GETACTION, tc_ctl_action, tc_dump_action,
-		      NULL);
+		      0);
 
 	return 0;
 }
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 8d1157aebaf7..ebeeb87e6d44 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -1010,10 +1010,10 @@ EXPORT_SYMBOL(tcf_exts_get_dev);
 
 static int __init tc_filter_init(void)
 {
-	rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_ctl_tfilter, NULL, NULL);
-	rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_ctl_tfilter, NULL, NULL);
+	rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_ctl_tfilter, NULL, 0);
+	rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_ctl_tfilter, NULL, 0);
 	rtnl_register(PF_UNSPEC, RTM_GETTFILTER, tc_ctl_tfilter,
-		      tc_dump_tfilter, NULL);
+		      tc_dump_tfilter, 0);
 
 	return 0;
 }
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index bd24a550e0f9..816c8092e601 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1952,14 +1952,14 @@ static int __init pktsched_init(void)
 	register_qdisc(&mq_qdisc_ops);
 	register_qdisc(&noqueue_qdisc_ops);
 
-	rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL, NULL);
-	rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL, NULL);
+	rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL, 0);
+	rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL, 0);
 	rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc,
-		      NULL);
-	rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL, NULL);
-	rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL, NULL);
+		      0);
+	rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL, 0);
+	rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL, 0);
 	rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass,
-		      NULL);
+		      0);
 
 	return 0;
 }
-- 
cgit v1.2.3


From 62256f98f244fbb1c7a10465e1ee412f209d8978 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Wed, 9 Aug 2017 20:41:52 +0200
Subject: rtnetlink: add RTNL_FLAG_DOIT_UNLOCKED

Allow callers to tell rtnetlink core that its doit callback
should be invoked without holding rtnl mutex.

Signed-off-by: Florian Westphal <fw@strlen.de>
Reviewed-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/rtnetlink.h |  4 ++++
 net/core/rtnetlink.c    | 15 +++++++++++++++
 2 files changed, 19 insertions(+)

(limited to 'include/net')

diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h
index ac32460a0adb..21837ca68ecc 100644
--- a/include/net/rtnetlink.h
+++ b/include/net/rtnetlink.h
@@ -8,6 +8,10 @@ typedef int (*rtnl_doit_func)(struct sk_buff *, struct nlmsghdr *,
 			      struct netlink_ext_ack *);
 typedef int (*rtnl_dumpit_func)(struct sk_buff *, struct netlink_callback *);
 
+enum rtnl_link_flags {
+	RTNL_FLAG_DOIT_UNLOCKED = 1,
+};
+
 int __rtnl_register(int protocol, int msgtype,
 		    rtnl_doit_func, rtnl_dumpit_func, unsigned int flags);
 void rtnl_register(int protocol, int msgtype,
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index d45946177bc8..dd4e50dfa248 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -62,6 +62,7 @@
 struct rtnl_link {
 	rtnl_doit_func		doit;
 	rtnl_dumpit_func	dumpit;
+	unsigned int		flags;
 };
 
 static DEFINE_MUTEX(rtnl_mutex);
@@ -184,6 +185,7 @@ int __rtnl_register(int protocol, int msgtype,
 		tab[msgindex].doit = doit;
 	if (dumpit)
 		tab[msgindex].dumpit = dumpit;
+	tab[msgindex].flags |= flags;
 
 	return 0;
 }
@@ -233,6 +235,7 @@ int rtnl_unregister(int protocol, int msgtype)
 
 	handlers[msgindex].doit = NULL;
 	handlers[msgindex].dumpit = NULL;
+	handlers[msgindex].flags = 0;
 	rtnl_unlock();
 
 	return 0;
@@ -4143,6 +4146,7 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
 	struct rtnl_link *handlers;
 	int err = -EOPNOTSUPP;
 	rtnl_doit_func doit;
+	unsigned int flags;
 	int kind;
 	int family;
 	int type;
@@ -4209,6 +4213,17 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
 		return err;
 	}
 
+	flags = READ_ONCE(handlers[type].flags);
+	if (flags & RTNL_FLAG_DOIT_UNLOCKED) {
+		refcount_inc(&rtnl_msg_handlers_ref[family]);
+		doit = READ_ONCE(handlers[type].doit);
+		rcu_read_unlock();
+		if (doit)
+			err = doit(skb, nlh, extack);
+		refcount_dec(&rtnl_msg_handlers_ref[family]);
+		return err;
+	}
+
 	rcu_read_unlock();
 
 	rtnl_lock();
-- 
cgit v1.2.3


From 68277a2c9d32fd9090247a5c08aaf1353049c0b1 Mon Sep 17 00:00:00 2001
From: John Crispin <john@phrozen.org>
Date: Wed, 9 Aug 2017 14:41:16 +0200
Subject: net-next: dsa: move struct dsa_device_ops to the global header file

We need to access this struct from within the flow_dissector to fix
dissection for packets coming in on DSA devices.

Signed-off-by: Muciri Gatimu <muciri@openmesh.com>
Signed-off-by: Shashidhar Lakkavalli <shashidhar.lakkavalli@openmesh.com>
Signed-off-by: John Crispin <john@phrozen.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dsa.h  | 7 +++++++
 net/dsa/dsa_priv.h | 7 -------
 2 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/net')

diff --git a/include/net/dsa.h b/include/net/dsa.h
index a4f66dbb4b7c..65d7804c6f69 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -101,6 +101,13 @@ struct dsa_platform_data {
 
 struct packet_type;
 
+struct dsa_device_ops {
+	struct sk_buff *(*xmit)(struct sk_buff *skb, struct net_device *dev);
+	struct sk_buff *(*rcv)(struct sk_buff *skb, struct net_device *dev,
+			       struct packet_type *pt,
+			       struct net_device *orig_dev);
+};
+
 struct dsa_switch_tree {
 	struct list_head	list;
 
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index 1debf9c42fc4..9c3eeb72462d 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -65,13 +65,6 @@ struct dsa_notifier_vlan_info {
 	int port;
 };
 
-struct dsa_device_ops {
-	struct sk_buff *(*xmit)(struct sk_buff *skb, struct net_device *dev);
-	struct sk_buff *(*rcv)(struct sk_buff *skb, struct net_device *dev,
-			       struct packet_type *pt,
-			       struct net_device *orig_dev);
-};
-
 struct dsa_slave_priv {
 	/* Copy of dp->ds->dst->tag_ops->xmit for faster access in hot path */
 	struct sk_buff *	(*xmit)(struct sk_buff *skb,
-- 
cgit v1.2.3


From 598a968011ffc4d624934995fe46d06bd450cdf4 Mon Sep 17 00:00:00 2001
From: John Crispin <john@phrozen.org>
Date: Wed, 9 Aug 2017 14:41:17 +0200
Subject: net-next: dsa: add flow_dissect callback to struct dsa_device_ops

When the flow dissector first sees packets coming in on a DSA devices the
802.3 header wont be located where the code expects it to be as the tag
is still present. Adding this new callback allows a DSA device to provide a
new function that the flow_dissector can use to get the correct protocol
and offset of the network header.

Signed-off-by: Muciri Gatimu <muciri@openmesh.com>
Signed-off-by: Shashidhar Lakkavalli <shashidhar.lakkavalli@openmesh.com>
Signed-off-by: John Crispin <john@phrozen.org>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dsa.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/net')

diff --git a/include/net/dsa.h b/include/net/dsa.h
index 65d7804c6f69..7f46b521313e 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -106,6 +106,8 @@ struct dsa_device_ops {
 	struct sk_buff *(*rcv)(struct sk_buff *skb, struct net_device *dev,
 			       struct packet_type *pt,
 			       struct net_device *orig_dev);
+	int (*flow_dissect)(const struct sk_buff *skb, __be16 *proto,
+			    int *offset);
 };
 
 struct dsa_switch_tree {
-- 
cgit v1.2.3


From 077fbac405bfc6d41419ad6c1725804ad4e9887c Mon Sep 17 00:00:00 2001
From: Lorenzo Colitti <lorenzo@google.com>
Date: Fri, 11 Aug 2017 02:11:33 +0900
Subject: net: xfrm: support setting an output mark.

On systems that use mark-based routing it may be necessary for
routing lookups to use marks in order for packets to be routed
correctly. An example of such a system is Android, which uses
socket marks to route packets via different networks.

Currently, routing lookups in tunnel mode always use a mark of
zero, making routing incorrect on such systems.

This patch adds a new output_mark element to the xfrm state and
a corresponding XFRMA_OUTPUT_MARK netlink attribute. The output
mark differs from the existing xfrm mark in two ways:

1. The xfrm mark is used to match xfrm policies and states, while
   the xfrm output mark is used to set the mark (and influence
   the routing) of the packets emitted by those states.
2. The existing mark is constrained to be a subset of the bits of
   the originating socket or transformed packet, but the output
   mark is arbitrary and depends only on the state.

The use of a separate mark provides additional flexibility. For
example:

- A packet subject to two transforms (e.g., transport mode inside
  tunnel mode) can have two different output marks applied to it,
  one for the transport mode SA and one for the tunnel mode SA.
- On a system where socket marks determine routing, the packets
  emitted by an IPsec tunnel can be routed based on a mark that
  is determined by the tunnel, not by the marks of the
  unencrypted packets.
- Support for setting the output marks can be introduced without
  breaking any existing setups that employ both mark-based
  routing and xfrm tunnel mode. Simply changing the code to use
  the xfrm mark for routing output packets could xfrm mark could
  change behaviour in a way that breaks these setups.

If the output mark is unspecified or set to zero, the mark is not
set or changed.

Tested: make allyesconfig; make -j64
Tested: https://android-review.googlesource.com/452776
Signed-off-by: Lorenzo Colitti <lorenzo@google.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/net/xfrm.h        |  9 ++++++---
 include/uapi/linux/xfrm.h |  1 +
 net/ipv4/xfrm4_policy.c   | 14 +++++++++-----
 net/ipv6/xfrm6_policy.c   |  9 ++++++---
 net/xfrm/xfrm_device.c    |  3 ++-
 net/xfrm/xfrm_output.c    |  3 +++
 net/xfrm/xfrm_policy.c    | 17 +++++++++--------
 net/xfrm/xfrm_user.c      | 11 +++++++++++
 8 files changed, 47 insertions(+), 20 deletions(-)

(limited to 'include/net')

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 18d7de34a5c3..9c7b70cce6d6 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -165,6 +165,7 @@ struct xfrm_state {
 		int		header_len;
 		int		trailer_len;
 		u32		extra_flags;
+		u32		output_mark;
 	} props;
 
 	struct xfrm_lifetime_cfg lft;
@@ -298,10 +299,12 @@ struct xfrm_policy_afinfo {
 	struct dst_entry	*(*dst_lookup)(struct net *net,
 					       int tos, int oif,
 					       const xfrm_address_t *saddr,
-					       const xfrm_address_t *daddr);
+					       const xfrm_address_t *daddr,
+					       u32 mark);
 	int			(*get_saddr)(struct net *net, int oif,
 					     xfrm_address_t *saddr,
-					     xfrm_address_t *daddr);
+					     xfrm_address_t *daddr,
+					     u32 mark);
 	void			(*decode_session)(struct sk_buff *skb,
 						  struct flowi *fl,
 						  int reverse);
@@ -1640,7 +1643,7 @@ static inline int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
 struct dst_entry *__xfrm_dst_lookup(struct net *net, int tos, int oif,
 				    const xfrm_address_t *saddr,
 				    const xfrm_address_t *daddr,
-				    int family);
+				    int family, u32 mark);
 
 struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp);
 
diff --git a/include/uapi/linux/xfrm.h b/include/uapi/linux/xfrm.h
index 2b384ff09fa0..5fe7370a2bef 100644
--- a/include/uapi/linux/xfrm.h
+++ b/include/uapi/linux/xfrm.h
@@ -304,6 +304,7 @@ enum xfrm_attr_type_t {
 	XFRMA_ADDRESS_FILTER,	/* struct xfrm_address_filter */
 	XFRMA_PAD,
 	XFRMA_OFFLOAD_DEV,	/* struct xfrm_state_offload */
+	XFRMA_OUTPUT_MARK,	/* __u32 */
 	__XFRMA_MAX
 
 #define XFRMA_MAX (__XFRMA_MAX - 1)
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 4aefb149fe0a..d7bf0b041885 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -20,7 +20,8 @@
 static struct dst_entry *__xfrm4_dst_lookup(struct net *net, struct flowi4 *fl4,
 					    int tos, int oif,
 					    const xfrm_address_t *saddr,
-					    const xfrm_address_t *daddr)
+					    const xfrm_address_t *daddr,
+					    u32 mark)
 {
 	struct rtable *rt;
 
@@ -28,6 +29,7 @@ static struct dst_entry *__xfrm4_dst_lookup(struct net *net, struct flowi4 *fl4,
 	fl4->daddr = daddr->a4;
 	fl4->flowi4_tos = tos;
 	fl4->flowi4_oif = l3mdev_master_ifindex_by_index(net, oif);
+	fl4->flowi4_mark = mark;
 	if (saddr)
 		fl4->saddr = saddr->a4;
 
@@ -42,20 +44,22 @@ static struct dst_entry *__xfrm4_dst_lookup(struct net *net, struct flowi4 *fl4,
 
 static struct dst_entry *xfrm4_dst_lookup(struct net *net, int tos, int oif,
 					  const xfrm_address_t *saddr,
-					  const xfrm_address_t *daddr)
+					  const xfrm_address_t *daddr,
+					  u32 mark)
 {
 	struct flowi4 fl4;
 
-	return __xfrm4_dst_lookup(net, &fl4, tos, oif, saddr, daddr);
+	return __xfrm4_dst_lookup(net, &fl4, tos, oif, saddr, daddr, mark);
 }
 
 static int xfrm4_get_saddr(struct net *net, int oif,
-			   xfrm_address_t *saddr, xfrm_address_t *daddr)
+			   xfrm_address_t *saddr, xfrm_address_t *daddr,
+			   u32 mark)
 {
 	struct dst_entry *dst;
 	struct flowi4 fl4;
 
-	dst = __xfrm4_dst_lookup(net, &fl4, 0, oif, NULL, daddr);
+	dst = __xfrm4_dst_lookup(net, &fl4, 0, oif, NULL, daddr, mark);
 	if (IS_ERR(dst))
 		return -EHOSTUNREACH;
 
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index f44b25a48478..11d1314ab6c5 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -27,7 +27,8 @@
 
 static struct dst_entry *xfrm6_dst_lookup(struct net *net, int tos, int oif,
 					  const xfrm_address_t *saddr,
-					  const xfrm_address_t *daddr)
+					  const xfrm_address_t *daddr,
+					  u32 mark)
 {
 	struct flowi6 fl6;
 	struct dst_entry *dst;
@@ -36,6 +37,7 @@ static struct dst_entry *xfrm6_dst_lookup(struct net *net, int tos, int oif,
 	memset(&fl6, 0, sizeof(fl6));
 	fl6.flowi6_oif = l3mdev_master_ifindex_by_index(net, oif);
 	fl6.flowi6_flags = FLOWI_FLAG_SKIP_NH_OIF;
+	fl6.flowi6_mark = mark;
 	memcpy(&fl6.daddr, daddr, sizeof(fl6.daddr));
 	if (saddr)
 		memcpy(&fl6.saddr, saddr, sizeof(fl6.saddr));
@@ -52,12 +54,13 @@ static struct dst_entry *xfrm6_dst_lookup(struct net *net, int tos, int oif,
 }
 
 static int xfrm6_get_saddr(struct net *net, int oif,
-			   xfrm_address_t *saddr, xfrm_address_t *daddr)
+			   xfrm_address_t *saddr, xfrm_address_t *daddr,
+			   u32 mark)
 {
 	struct dst_entry *dst;
 	struct net_device *dev;
 
-	dst = xfrm6_dst_lookup(net, 0, oif, NULL, daddr);
+	dst = xfrm6_dst_lookup(net, 0, oif, NULL, daddr, mark);
 	if (IS_ERR(dst))
 		return -EHOSTUNREACH;
 
diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c
index 1904127f5fb8..acf00104ef31 100644
--- a/net/xfrm/xfrm_device.c
+++ b/net/xfrm/xfrm_device.c
@@ -79,7 +79,8 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x,
 			daddr = &x->props.saddr;
 		}
 
-		dst = __xfrm_dst_lookup(net, 0, 0, saddr, daddr, x->props.family);
+		dst = __xfrm_dst_lookup(net, 0, 0, saddr, daddr,
+					x->props.family, x->props.output_mark);
 		if (IS_ERR(dst))
 			return 0;
 
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index 8c0b6722aaa8..31a2e6d34dba 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -66,6 +66,9 @@ static int xfrm_output_one(struct sk_buff *skb, int err)
 			goto error_nolock;
 		}
 
+		if (x->props.output_mark)
+			skb->mark = x->props.output_mark;
+
 		err = x->outer_mode->output(x, skb);
 		if (err) {
 			XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEMODEERROR);
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 06c3bf7ab86b..1de52f36caf5 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -122,7 +122,7 @@ static const struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short fa
 struct dst_entry *__xfrm_dst_lookup(struct net *net, int tos, int oif,
 				    const xfrm_address_t *saddr,
 				    const xfrm_address_t *daddr,
-				    int family)
+				    int family, u32 mark)
 {
 	const struct xfrm_policy_afinfo *afinfo;
 	struct dst_entry *dst;
@@ -131,7 +131,7 @@ struct dst_entry *__xfrm_dst_lookup(struct net *net, int tos, int oif,
 	if (unlikely(afinfo == NULL))
 		return ERR_PTR(-EAFNOSUPPORT);
 
-	dst = afinfo->dst_lookup(net, tos, oif, saddr, daddr);
+	dst = afinfo->dst_lookup(net, tos, oif, saddr, daddr, mark);
 
 	rcu_read_unlock();
 
@@ -143,7 +143,7 @@ static inline struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x,
 						int tos, int oif,
 						xfrm_address_t *prev_saddr,
 						xfrm_address_t *prev_daddr,
-						int family)
+						int family, u32 mark)
 {
 	struct net *net = xs_net(x);
 	xfrm_address_t *saddr = &x->props.saddr;
@@ -159,7 +159,7 @@ static inline struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x,
 		daddr = x->coaddr;
 	}
 
-	dst = __xfrm_dst_lookup(net, tos, oif, saddr, daddr, family);
+	dst = __xfrm_dst_lookup(net, tos, oif, saddr, daddr, family, mark);
 
 	if (!IS_ERR(dst)) {
 		if (prev_saddr != saddr)
@@ -1340,14 +1340,14 @@ int __xfrm_sk_clone_policy(struct sock *sk, const struct sock *osk)
 
 static int
 xfrm_get_saddr(struct net *net, int oif, xfrm_address_t *local,
-	       xfrm_address_t *remote, unsigned short family)
+	       xfrm_address_t *remote, unsigned short family, u32 mark)
 {
 	int err;
 	const struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
 
 	if (unlikely(afinfo == NULL))
 		return -EINVAL;
-	err = afinfo->get_saddr(net, oif, local, remote);
+	err = afinfo->get_saddr(net, oif, local, remote, mark);
 	rcu_read_unlock();
 	return err;
 }
@@ -1378,7 +1378,7 @@ xfrm_tmpl_resolve_one(struct xfrm_policy *policy, const struct flowi *fl,
 			if (xfrm_addr_any(local, tmpl->encap_family)) {
 				error = xfrm_get_saddr(net, fl->flowi_oif,
 						       &tmp, remote,
-						       tmpl->encap_family);
+						       tmpl->encap_family, 0);
 				if (error)
 					goto fail;
 				local = &tmp;
@@ -1598,7 +1598,8 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
 		if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) {
 			family = xfrm[i]->props.family;
 			dst = xfrm_dst_lookup(xfrm[i], tos, fl->flowi_oif,
-					      &saddr, &daddr, family);
+					      &saddr, &daddr, family,
+					      xfrm[i]->props.output_mark);
 			err = PTR_ERR(dst);
 			if (IS_ERR(dst))
 				goto put_states;
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index ffe8d5ef09eb..cc3268d814b4 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -584,6 +584,9 @@ static struct xfrm_state *xfrm_state_construct(struct net *net,
 
 	xfrm_mark_get(attrs, &x->mark);
 
+	if (attrs[XFRMA_OUTPUT_MARK])
+		x->props.output_mark = nla_get_u32(attrs[XFRMA_OUTPUT_MARK]);
+
 	err = __xfrm_init_state(x, false, attrs[XFRMA_OFFLOAD_DEV]);
 	if (err)
 		goto error;
@@ -899,6 +902,11 @@ static int copy_to_user_state_extra(struct xfrm_state *x,
 		goto out;
 	if (x->security)
 		ret = copy_sec_ctx(x->security, skb);
+	if (x->props.output_mark) {
+		ret = nla_put_u32(skb, XFRMA_OUTPUT_MARK, x->props.output_mark);
+		if (ret)
+			goto out;
+	}
 out:
 	return ret;
 }
@@ -2454,6 +2462,7 @@ static const struct nla_policy xfrma_policy[XFRMA_MAX+1] = {
 	[XFRMA_PROTO]		= { .type = NLA_U8 },
 	[XFRMA_ADDRESS_FILTER]	= { .len = sizeof(struct xfrm_address_filter) },
 	[XFRMA_OFFLOAD_DEV]	= { .len = sizeof(struct xfrm_user_offload) },
+	[XFRMA_OUTPUT_MARK]	= { .len = NLA_U32 },
 };
 
 static const struct nla_policy xfrma_spd_policy[XFRMA_SPD_MAX+1] = {
@@ -2673,6 +2682,8 @@ static inline size_t xfrm_sa_len(struct xfrm_state *x)
 		l += nla_total_size(sizeof(x->props.extra_flags));
 	if (x->xso.dev)
 		 l += nla_total_size(sizeof(x->xso));
+	if (x->props.output_mark)
+		l += nla_total_size(sizeof(x->props.output_mark));
 
 	/* Must count x->lastused as it may become non-zero behind our back. */
 	l += nla_total_size_64bit(sizeof(u64));
-- 
cgit v1.2.3


From afa6c45429f6e5ddd1eb6b77a36358f9c4b789da Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Fri, 11 Aug 2017 10:23:45 +0800
Subject: sctp: remove the unused typedef sctp_packet_phandler_t

Remove this function typedef, there is even no places
using it.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/structs.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/net')

diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index fbe6e81b889b..73e9509c057e 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -657,8 +657,6 @@ struct sctp_sockaddr_entry {
 
 #define SCTP_ADDRESS_TICK_DELAY	500
 
-typedef struct sctp_chunk *(sctp_packet_phandler_t)(struct sctp_association *);
-
 /* This structure holds lists of chunks as we are assembling for
  * transmission.
  */
-- 
cgit v1.2.3


From edf903f83ebca988e04a39f515ab6eacb92055df Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Fri, 11 Aug 2017 10:23:46 +0800
Subject: sctp: remove the typedef sctp_sender_hb_info_t

This patch is to remove the typedef sctp_sender_hb_info_t, and
replace with struct sctp_sender_hb_info in the places where it's
using this typedef.

It is also to use sizeof(variable) instead of sizeof(type).

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/structs.h |  4 ++--
 net/sctp/sm_make_chunk.c   |  6 +++---
 net/sctp/sm_sideeffect.c   |  4 ++--
 net/sctp/sm_statefuns.c    | 12 +++++-------
 4 files changed, 12 insertions(+), 14 deletions(-)

(limited to 'include/net')

diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 73e9509c057e..60033a263193 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -371,12 +371,12 @@ union sctp_params {
  *    chunk is sent and the destination transport address to which this
  *    HEARTBEAT is sent (see Section 8.3).
  */
-typedef struct sctp_sender_hb_info {
+struct sctp_sender_hb_info {
 	struct sctp_paramhdr param_hdr;
 	union sctp_addr daddr;
 	unsigned long sent_at;
 	__u64 hb_nonce;
-} sctp_sender_hb_info_t;
+};
 
 int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt,
 		     gfp_t gfp);
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 3a8fb1dffbc1..51de638a88b2 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -1142,10 +1142,10 @@ nodata:
 
 /* Make a HEARTBEAT chunk.  */
 struct sctp_chunk *sctp_make_heartbeat(const struct sctp_association *asoc,
-				  const struct sctp_transport *transport)
+				       const struct sctp_transport *transport)
 {
+	struct sctp_sender_hb_info hbinfo;
 	struct sctp_chunk *retval;
-	sctp_sender_hb_info_t hbinfo;
 
 	retval = sctp_make_control(asoc, SCTP_CID_HEARTBEAT, 0,
 				   sizeof(hbinfo), GFP_ATOMIC);
@@ -1154,7 +1154,7 @@ struct sctp_chunk *sctp_make_heartbeat(const struct sctp_association *asoc,
 		goto nodata;
 
 	hbinfo.param_hdr.type = SCTP_PARAM_HEARTBEAT_INFO;
-	hbinfo.param_hdr.length = htons(sizeof(sctp_sender_hb_info_t));
+	hbinfo.param_hdr.length = htons(sizeof(hbinfo));
 	hbinfo.daddr = transport->ipaddr;
 	hbinfo.sent_at = jiffies;
 	hbinfo.hb_nonce = transport->hb_nonce;
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index 4a12d29d9aa1..5e8e41879b03 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -714,7 +714,7 @@ static void sctp_cmd_transport_on(sctp_cmd_seq_t *cmds,
 				  struct sctp_transport *t,
 				  struct sctp_chunk *chunk)
 {
-	sctp_sender_hb_info_t *hbinfo;
+	struct sctp_sender_hb_info *hbinfo;
 	int was_unconfirmed = 0;
 
 	/* 8.3 Upon the receipt of the HEARTBEAT ACK, the sender of the
@@ -768,7 +768,7 @@ static void sctp_cmd_transport_on(sctp_cmd_seq_t *cmds,
 	if (t->rto_pending == 0)
 		t->rto_pending = 1;
 
-	hbinfo = (sctp_sender_hb_info_t *) chunk->skb->data;
+	hbinfo = (struct sctp_sender_hb_info *)chunk->skb->data;
 	sctp_transport_update_rto(t, (jiffies - hbinfo->sent_at));
 
 	/* Update the heartbeat timer.  */
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index ac6aaa046529..af93419209df 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -1155,27 +1155,25 @@ sctp_disposition_t sctp_sf_backbeat_8_3(struct net *net,
 					void *arg,
 					sctp_cmd_seq_t *commands)
 {
+	struct sctp_sender_hb_info *hbinfo;
 	struct sctp_chunk *chunk = arg;
-	union sctp_addr from_addr;
 	struct sctp_transport *link;
-	sctp_sender_hb_info_t *hbinfo;
 	unsigned long max_interval;
+	union sctp_addr from_addr;
 
 	if (!sctp_vtag_verify(chunk, asoc))
 		return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 
 	/* Make sure that the HEARTBEAT-ACK chunk has a valid length.  */
 	if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr) +
-					    sizeof(sctp_sender_hb_info_t)))
+					    sizeof(*hbinfo)))
 		return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
 						  commands);
 
-	hbinfo = (sctp_sender_hb_info_t *) chunk->skb->data;
+	hbinfo = (struct sctp_sender_hb_info *)chunk->skb->data;
 	/* Make sure that the length of the parameter is what we expect */
-	if (ntohs(hbinfo->param_hdr.length) !=
-				    sizeof(sctp_sender_hb_info_t)) {
+	if (ntohs(hbinfo->param_hdr.length) != sizeof(*hbinfo))
 		return SCTP_DISPOSITION_DISCARD;
-	}
 
 	from_addr = hbinfo->daddr;
 	link = sctp_assoc_lookup_paddr(asoc, &from_addr);
-- 
cgit v1.2.3


From 74439f344b1becd57ec822bc0e2c1a4cbf240a53 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Fri, 11 Aug 2017 10:23:47 +0800
Subject: sctp: remove the typedef sctp_endpoint_type_t

This patch is to remove the typedef sctp_endpoint_type_t, and
replace with enum sctp_endpoint_type in the places where it's
using this typedef.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/structs.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/net')

diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 60033a263193..937187f3bffc 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -1142,10 +1142,10 @@ int sctp_is_ep_boundall(struct sock *sk);
 
 
 /* What type of endpoint?  */
-typedef enum {
+enum sctp_endpoint_type {
 	SCTP_EP_TYPE_SOCKET,
 	SCTP_EP_TYPE_ASSOCIATION,
-} sctp_endpoint_type_t;
+};
 
 /*
  * A common base class to bridge the implmentation view of a
@@ -1169,7 +1169,7 @@ struct sctp_ep_common {
 	int hashent;
 
 	/* Runtime type information.  What kind of endpoint is this? */
-	sctp_endpoint_type_t type;
+	enum sctp_endpoint_type type;
 
 	/* Some fields to help us manage this object.
 	 *   refcnt   - Reference count access to this object.
-- 
cgit v1.2.3


From a05437ac5deb100f94e290ad4c5eef3e78f4b6bb Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Fri, 11 Aug 2017 10:23:48 +0800
Subject: sctp: remove the typedef sctp_cmsgs_t

This patch is to remove the typedef sctp_cmsgs_t, and
replace with struct sctp_cmsgs in the places where it's
using this typedef.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/structs.h | 4 ++--
 net/sctp/socket.c          | 9 +++++----
 2 files changed, 7 insertions(+), 6 deletions(-)

(limited to 'include/net')

diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 937187f3bffc..e171d3a3d2b4 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -1985,11 +1985,11 @@ int sctp_cmp_addr_exact(const union sctp_addr *ss1,
 struct sctp_chunk *sctp_get_ecne_prepend(struct sctp_association *asoc);
 
 /* A convenience structure to parse out SCTP specific CMSGs. */
-typedef struct sctp_cmsgs {
+struct sctp_cmsgs {
 	struct sctp_initmsg *init;
 	struct sctp_sndrcvinfo *srinfo;
 	struct sctp_sndinfo *sinfo;
-} sctp_cmsgs_t;
+};
 
 /* Structure for tracking memory objects */
 typedef struct {
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index a1e2113806dd..018190655d63 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -1593,7 +1593,8 @@ static int sctp_error(struct sock *sk, int flags, int err)
  */
 /* BUG:  We do not implement the equivalent of sk_stream_wait_memory(). */
 
-static int sctp_msghdr_parse(const struct msghdr *, sctp_cmsgs_t *);
+static int sctp_msghdr_parse(const struct msghdr *msg,
+			     struct sctp_cmsgs *cmsgs);
 
 static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
 {
@@ -1609,7 +1610,7 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
 	struct sctp_sndrcvinfo *sinfo;
 	struct sctp_initmsg *sinit;
 	sctp_assoc_t associd = 0;
-	sctp_cmsgs_t cmsgs = { NULL };
+	struct sctp_cmsgs cmsgs = { NULL };
 	enum sctp_scope scope;
 	bool fill_sinfo_ttl = false, wait_connect = false;
 	struct sctp_datamsg *datamsg;
@@ -7445,10 +7446,10 @@ static int sctp_autobind(struct sock *sk)
  * msg_control
  * points here
  */
-static int sctp_msghdr_parse(const struct msghdr *msg, sctp_cmsgs_t *cmsgs)
+static int sctp_msghdr_parse(const struct msghdr *msg, struct sctp_cmsgs *cmsgs)
 {
-	struct cmsghdr *cmsg;
 	struct msghdr *my_msg = (struct msghdr *)msg;
+	struct cmsghdr *cmsg;
 
 	for_each_cmsghdr(cmsg, my_msg) {
 		if (!CMSG_OK(my_msg, cmsg))
-- 
cgit v1.2.3


From d38ef5ae35b0960f3219f1cf0203e19819e757c7 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Fri, 11 Aug 2017 10:23:49 +0800
Subject: sctp: remove the typedef sctp_dbg_objcnt_entry_t

This patch is to remove the typedef sctp_dbg_objcnt_entry_t, and
replace with struct sctp_dbg_objcnt_entry in the places where it's
using this typedef.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/structs.h | 4 ++--
 net/sctp/objcnt.c          | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/net')

diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index e171d3a3d2b4..b6d75b37f84d 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -1992,9 +1992,9 @@ struct sctp_cmsgs {
 };
 
 /* Structure for tracking memory objects */
-typedef struct {
+struct sctp_dbg_objcnt_entry {
 	char *label;
 	atomic_t *counter;
-} sctp_dbg_objcnt_entry_t;
+};
 
 #endif /* __sctp_structs_h__ */
diff --git a/net/sctp/objcnt.c b/net/sctp/objcnt.c
index 105ac3327b28..aeea6da81441 100644
--- a/net/sctp/objcnt.c
+++ b/net/sctp/objcnt.c
@@ -57,7 +57,7 @@ SCTP_DBG_OBJCNT(keys);
 /* An array to make it easy to pretty print the debug information
  * to the proc fs.
  */
-static sctp_dbg_objcnt_entry_t sctp_dbg_objcnt[] = {
+static struct sctp_dbg_objcnt_entry sctp_dbg_objcnt[] = {
 	SCTP_DBG_OBJCNT_ENTRY(sock),
 	SCTP_DBG_OBJCNT_ENTRY(ep),
 	SCTP_DBG_OBJCNT_ENTRY(assoc),
-- 
cgit v1.2.3


From b7ef2618a0bf75c1e480b05739b0c5f2a42081cd Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Fri, 11 Aug 2017 10:23:50 +0800
Subject: sctp: remove the typedef sctp_socket_type_t

This patch is to remove the typedef sctp_socket_type_t, and
replace with enum sctp_socket_type in the places where it's
using this typedef.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/sctp.h    | 3 ++-
 include/net/sctp/structs.h | 6 +++---
 net/sctp/socket.c          | 7 ++++---
 3 files changed, 9 insertions(+), 7 deletions(-)

(limited to 'include/net')

diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index 24ff7931d38c..06b4f515e157 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -550,7 +550,8 @@ static inline int sctp_ep_hashfn(struct net *net, __u16 lport)
 
 /* Is a socket of this style? */
 #define sctp_style(sk, style) __sctp_style((sk), (SCTP_SOCKET_##style))
-static inline int __sctp_style(const struct sock *sk, sctp_socket_type_t style)
+static inline int __sctp_style(const struct sock *sk,
+			       enum sctp_socket_type style)
 {
 	return sctp_sk(sk)->type == style;
 }
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index b6d75b37f84d..0477945de1a3 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -150,18 +150,18 @@ extern struct sctp_globals {
 #define sctp_checksum_disable		(sctp_globals.checksum_disable)
 
 /* SCTP Socket type: UDP or TCP style. */
-typedef enum {
+enum sctp_socket_type {
 	SCTP_SOCKET_UDP = 0,
 	SCTP_SOCKET_UDP_HIGH_BANDWIDTH,
 	SCTP_SOCKET_TCP
-} sctp_socket_type_t;
+};
 
 /* Per socket SCTP information. */
 struct sctp_sock {
 	/* inet_sock has to be the first member of sctp_sock */
 	struct inet_sock inet;
 	/* What kind of a socket is this? */
-	sctp_socket_type_t type;
+	enum sctp_socket_type type;
 
 	/* PF_ family specific functions.  */
 	struct sctp_pf *pf;
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 018190655d63..c01af72cc603 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -100,8 +100,9 @@ static int sctp_send_asconf(struct sctp_association *asoc,
 			    struct sctp_chunk *chunk);
 static int sctp_do_bind(struct sock *, union sctp_addr *, int);
 static int sctp_autobind(struct sock *sk);
-static void sctp_sock_migrate(struct sock *, struct sock *,
-			      struct sctp_association *, sctp_socket_type_t);
+static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
+			      struct sctp_association *assoc,
+			      enum sctp_socket_type type);
 
 static unsigned long sctp_memory_pressure;
 static atomic_long_t sctp_memory_allocated;
@@ -8086,7 +8087,7 @@ static inline void sctp_copy_descendant(struct sock *sk_to,
  */
 static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
 			      struct sctp_association *assoc,
-			      sctp_socket_type_t type)
+			      enum sctp_socket_type type)
 {
 	struct sctp_sock *oldsp = sctp_sk(oldsk);
 	struct sctp_sock *newsp = sctp_sk(newsk);
-- 
cgit v1.2.3


From e2c3108ab25b4dbab3821e8b6084bfb73afb655c Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Fri, 11 Aug 2017 10:23:51 +0800
Subject: sctp: remove the typedef sctp_cmd_t

This patch is to remove the typedef sctp_cmd_t, and
replace with enum sctp_cmd in the places where it's
using this typedef.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/command.h | 14 +++++++-------
 net/sctp/sm_sideeffect.c   |  2 +-
 2 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'include/net')

diff --git a/include/net/sctp/command.h b/include/net/sctp/command.h
index 376cb78b6247..4e9e589b8f18 100644
--- a/include/net/sctp/command.h
+++ b/include/net/sctp/command.h
@@ -196,15 +196,15 @@ static inline sctp_arg_t SCTP_NULL(void)
 	return retval;
 }
 
-typedef struct {
+struct sctp_cmd {
 	sctp_arg_t obj;
 	sctp_verb_t verb;
-} sctp_cmd_t;
+};
 
 typedef struct {
-	sctp_cmd_t cmds[SCTP_MAX_NUM_COMMANDS];
-	sctp_cmd_t *last_used_slot;
-	sctp_cmd_t *next_cmd;
+	struct sctp_cmd cmds[SCTP_MAX_NUM_COMMANDS];
+	struct sctp_cmd *last_used_slot;
+	struct sctp_cmd *next_cmd;
 } sctp_cmd_seq_t;
 
 
@@ -228,7 +228,7 @@ static inline int sctp_init_cmd_seq(sctp_cmd_seq_t *seq)
 static inline void sctp_add_cmd_sf(sctp_cmd_seq_t *seq, sctp_verb_t verb,
 				   sctp_arg_t obj)
 {
-	sctp_cmd_t *cmd = seq->last_used_slot - 1;
+	struct sctp_cmd *cmd = seq->last_used_slot - 1;
 
 	BUG_ON(cmd < seq->cmds);
 
@@ -240,7 +240,7 @@ static inline void sctp_add_cmd_sf(sctp_cmd_seq_t *seq, sctp_verb_t verb,
 /* Return the next command structure in an sctp_cmd_seq.
  * Return NULL at the end of the sequence.
  */
-static inline sctp_cmd_t *sctp_next_cmd(sctp_cmd_seq_t *seq)
+static inline struct sctp_cmd *sctp_next_cmd(sctp_cmd_seq_t *seq)
 {
 	if (seq->next_cmd <= seq->last_used_slot)
 		return NULL;
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index 5e8e41879b03..0cb3d5a723af 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -1277,7 +1277,7 @@ static int sctp_cmd_interpreter(enum sctp_event event_type,
 	struct sctp_sock *sp = sctp_sk(sk);
 	int error = 0;
 	int force;
-	sctp_cmd_t *cmd;
+	struct sctp_cmd *cmd;
 	struct sctp_chunk *new_obj;
 	struct sctp_chunk *chunk = NULL;
 	struct sctp_packet *packet;
-- 
cgit v1.2.3


From a85bbeb221d860097859f110ba1321f2b0653f07 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Fri, 11 Aug 2017 10:23:52 +0800
Subject: sctp: remove the typedef sctp_cmd_seq_t

This patch is to remove the typedef sctp_cmd_seq_t, and
replace with struct sctp_cmd_seq in the places where it's
using this typedef.

Note that it doesn't fix many indents although it should,
as sctp_disposition_t's removal would mess them up again.
So better to fix them when removing sctp_disposition_t in
the later patch.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/command.h |  14 +--
 include/net/sctp/sm.h      |   2 +-
 net/sctp/probe.c           |   2 +-
 net/sctp/sm_sideeffect.c   |  54 ++++++------
 net/sctp/sm_statefuns.c    | 216 +++++++++++++++++++++++----------------------
 5 files changed, 145 insertions(+), 143 deletions(-)

(limited to 'include/net')

diff --git a/include/net/sctp/command.h b/include/net/sctp/command.h
index 4e9e589b8f18..cbf6798866ef 100644
--- a/include/net/sctp/command.h
+++ b/include/net/sctp/command.h
@@ -110,7 +110,7 @@ typedef enum {
 	SCTP_CMD_LAST
 } sctp_verb_t;
 
-/* How many commands can you put in an sctp_cmd_seq_t?
+/* How many commands can you put in an struct sctp_cmd_seq?
  * This is a rather arbitrary number, ideally derived from a careful
  * analysis of the state functions, but in reality just taken from
  * thin air in the hopes othat we don't trigger a kernel panic.
@@ -201,17 +201,17 @@ struct sctp_cmd {
 	sctp_verb_t verb;
 };
 
-typedef struct {
+struct sctp_cmd_seq {
 	struct sctp_cmd cmds[SCTP_MAX_NUM_COMMANDS];
 	struct sctp_cmd *last_used_slot;
 	struct sctp_cmd *next_cmd;
-} sctp_cmd_seq_t;
+};
 
 
 /* Initialize a block of memory as a command sequence.
  * Return 0 if the initialization fails.
  */
-static inline int sctp_init_cmd_seq(sctp_cmd_seq_t *seq)
+static inline int sctp_init_cmd_seq(struct sctp_cmd_seq *seq)
 {
 	/* cmds[] is filled backwards to simplify the overflow BUG() check */
 	seq->last_used_slot = seq->cmds + SCTP_MAX_NUM_COMMANDS;
@@ -220,12 +220,12 @@ static inline int sctp_init_cmd_seq(sctp_cmd_seq_t *seq)
 }
 
 
-/* Add a command to an sctp_cmd_seq_t.
+/* Add a command to an struct sctp_cmd_seq.
  *
  * Use the SCTP_* constructors defined by SCTP_ARG_CONSTRUCTOR() above
  * to wrap data which goes in the obj argument.
  */
-static inline void sctp_add_cmd_sf(sctp_cmd_seq_t *seq, sctp_verb_t verb,
+static inline void sctp_add_cmd_sf(struct sctp_cmd_seq *seq, sctp_verb_t verb,
 				   sctp_arg_t obj)
 {
 	struct sctp_cmd *cmd = seq->last_used_slot - 1;
@@ -240,7 +240,7 @@ static inline void sctp_add_cmd_sf(sctp_cmd_seq_t *seq, sctp_verb_t verb,
 /* Return the next command structure in an sctp_cmd_seq.
  * Return NULL at the end of the sequence.
  */
-static inline struct sctp_cmd *sctp_next_cmd(sctp_cmd_seq_t *seq)
+static inline struct sctp_cmd *sctp_next_cmd(struct sctp_cmd_seq *seq)
 {
 	if (seq->next_cmd <= seq->last_used_slot)
 		return NULL;
diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h
index 1e7651c3b158..9af64b98d96b 100644
--- a/include/net/sctp/sm.h
+++ b/include/net/sctp/sm.h
@@ -75,7 +75,7 @@ typedef sctp_disposition_t (sctp_state_fn_t) (struct net *,
 					      const struct sctp_association *,
 					      const union sctp_subtype type,
 					      void *arg,
-					      sctp_cmd_seq_t *);
+					      struct sctp_cmd_seq *);
 typedef void (sctp_timer_event_t) (unsigned long);
 typedef struct {
 	sctp_state_fn_t *fn;
diff --git a/net/sctp/probe.c b/net/sctp/probe.c
index 43837dfc86a7..34097a167431 100644
--- a/net/sctp/probe.c
+++ b/net/sctp/probe.c
@@ -132,7 +132,7 @@ static sctp_disposition_t jsctp_sf_eat_sack(struct net *net,
 					    const struct sctp_association *asoc,
 					    const union sctp_subtype type,
 					    void *arg,
-					    sctp_cmd_seq_t *commands)
+					    struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *chunk = arg;
 	struct sk_buff *skb = chunk->skb;
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index 0cb3d5a723af..6dd5934cbda6 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -58,7 +58,7 @@ static int sctp_cmd_interpreter(enum sctp_event event_type,
 				struct sctp_association *asoc,
 				void *event_arg,
 				sctp_disposition_t status,
-				sctp_cmd_seq_t *commands,
+				struct sctp_cmd_seq *commands,
 				gfp_t gfp);
 static int sctp_side_effects(enum sctp_event event_type,
 			     union sctp_subtype subtype,
@@ -67,7 +67,7 @@ static int sctp_side_effects(enum sctp_event event_type,
 			     struct sctp_association **asoc,
 			     void *event_arg,
 			     sctp_disposition_t status,
-			     sctp_cmd_seq_t *commands,
+			     struct sctp_cmd_seq *commands,
 			     gfp_t gfp);
 
 /********************************************************************
@@ -150,7 +150,7 @@ static void sctp_do_ecn_cwr_work(struct sctp_association *asoc,
 
 /* Generate SACK if necessary.  We call this at the end of a packet.  */
 static int sctp_gen_sack(struct sctp_association *asoc, int force,
-			 sctp_cmd_seq_t *commands)
+			 struct sctp_cmd_seq *commands)
 {
 	__u32 ctsn, max_tsn_seen;
 	struct sctp_chunk *sack;
@@ -506,7 +506,7 @@ sctp_timer_event_t *sctp_timer_events[SCTP_NUM_TIMEOUT_TYPES] = {
  * notification SHOULD be sent to the upper layer.
  *
  */
-static void sctp_do_8_2_transport_strike(sctp_cmd_seq_t *commands,
+static void sctp_do_8_2_transport_strike(struct sctp_cmd_seq *commands,
 					 struct sctp_association *asoc,
 					 struct sctp_transport *transport,
 					 int is_hb)
@@ -578,7 +578,7 @@ static void sctp_do_8_2_transport_strike(sctp_cmd_seq_t *commands,
 }
 
 /* Worker routine to handle INIT command failure.  */
-static void sctp_cmd_init_failed(sctp_cmd_seq_t *commands,
+static void sctp_cmd_init_failed(struct sctp_cmd_seq *commands,
 				 struct sctp_association *asoc,
 				 unsigned int error)
 {
@@ -601,7 +601,7 @@ static void sctp_cmd_init_failed(sctp_cmd_seq_t *commands,
 }
 
 /* Worker routine to handle SCTP_CMD_ASSOC_FAILED.  */
-static void sctp_cmd_assoc_failed(sctp_cmd_seq_t *commands,
+static void sctp_cmd_assoc_failed(struct sctp_cmd_seq *commands,
 				  struct sctp_association *asoc,
 				  enum sctp_event event_type,
 				  union sctp_subtype subtype,
@@ -645,7 +645,7 @@ static void sctp_cmd_assoc_failed(sctp_cmd_seq_t *commands,
  * since all other cases use "temporary" associations and can do all
  * their work in statefuns directly.
  */
-static int sctp_cmd_process_init(sctp_cmd_seq_t *commands,
+static int sctp_cmd_process_init(struct sctp_cmd_seq *commands,
 				 struct sctp_association *asoc,
 				 struct sctp_chunk *chunk,
 				 struct sctp_init_chunk *peer_init,
@@ -667,7 +667,7 @@ static int sctp_cmd_process_init(sctp_cmd_seq_t *commands,
 }
 
 /* Helper function to break out starting up of heartbeat timers.  */
-static void sctp_cmd_hb_timers_start(sctp_cmd_seq_t *cmds,
+static void sctp_cmd_hb_timers_start(struct sctp_cmd_seq *cmds,
 				     struct sctp_association *asoc)
 {
 	struct sctp_transport *t;
@@ -680,7 +680,7 @@ static void sctp_cmd_hb_timers_start(sctp_cmd_seq_t *cmds,
 		sctp_transport_reset_hb_timer(t);
 }
 
-static void sctp_cmd_hb_timers_stop(sctp_cmd_seq_t *cmds,
+static void sctp_cmd_hb_timers_stop(struct sctp_cmd_seq *cmds,
 				    struct sctp_association *asoc)
 {
 	struct sctp_transport *t;
@@ -695,7 +695,7 @@ static void sctp_cmd_hb_timers_stop(sctp_cmd_seq_t *cmds,
 }
 
 /* Helper function to stop any pending T3-RTX timers */
-static void sctp_cmd_t3_rtx_timers_stop(sctp_cmd_seq_t *cmds,
+static void sctp_cmd_t3_rtx_timers_stop(struct sctp_cmd_seq *cmds,
 					struct sctp_association *asoc)
 {
 	struct sctp_transport *t;
@@ -709,7 +709,7 @@ static void sctp_cmd_t3_rtx_timers_stop(sctp_cmd_seq_t *cmds,
 
 
 /* Helper function to handle the reception of an HEARTBEAT ACK.  */
-static void sctp_cmd_transport_on(sctp_cmd_seq_t *cmds,
+static void sctp_cmd_transport_on(struct sctp_cmd_seq *cmds,
 				  struct sctp_association *asoc,
 				  struct sctp_transport *t,
 				  struct sctp_chunk *chunk)
@@ -780,7 +780,7 @@ static void sctp_cmd_transport_on(sctp_cmd_seq_t *cmds,
 
 
 /* Helper function to process the process SACK command.  */
-static int sctp_cmd_process_sack(sctp_cmd_seq_t *cmds,
+static int sctp_cmd_process_sack(struct sctp_cmd_seq *cmds,
 				 struct sctp_association *asoc,
 				 struct sctp_chunk *chunk)
 {
@@ -802,7 +802,7 @@ static int sctp_cmd_process_sack(sctp_cmd_seq_t *cmds,
 /* Helper function to set the timeout value for T2-SHUTDOWN timer and to set
  * the transport for a shutdown chunk.
  */
-static void sctp_cmd_setup_t2(sctp_cmd_seq_t *cmds,
+static void sctp_cmd_setup_t2(struct sctp_cmd_seq *cmds,
 			      struct sctp_association *asoc,
 			      struct sctp_chunk *chunk)
 {
@@ -819,7 +819,7 @@ static void sctp_cmd_setup_t2(sctp_cmd_seq_t *cmds,
 	asoc->timeouts[SCTP_EVENT_TIMEOUT_T2_SHUTDOWN] = t->rto;
 }
 
-static void sctp_cmd_assoc_update(sctp_cmd_seq_t *cmds,
+static void sctp_cmd_assoc_update(struct sctp_cmd_seq *cmds,
 				  struct sctp_association *asoc,
 				  struct sctp_association *new)
 {
@@ -842,7 +842,7 @@ static void sctp_cmd_assoc_update(sctp_cmd_seq_t *cmds,
 }
 
 /* Helper function to change the state of an association. */
-static void sctp_cmd_new_state(sctp_cmd_seq_t *cmds,
+static void sctp_cmd_new_state(struct sctp_cmd_seq *cmds,
 			       struct sctp_association *asoc,
 			       enum sctp_state state)
 {
@@ -902,7 +902,7 @@ static void sctp_cmd_new_state(sctp_cmd_seq_t *cmds,
 }
 
 /* Helper function to delete an association. */
-static void sctp_cmd_delete_tcb(sctp_cmd_seq_t *cmds,
+static void sctp_cmd_delete_tcb(struct sctp_cmd_seq *cmds,
 				struct sctp_association *asoc)
 {
 	struct sock *sk = asoc->base.sk;
@@ -924,9 +924,9 @@ static void sctp_cmd_delete_tcb(sctp_cmd_seq_t *cmds,
  * destination address (we use active path instead of primary path just
  * because primary path may be inactive.
  */
-static void sctp_cmd_setup_t4(sctp_cmd_seq_t *cmds,
-				struct sctp_association *asoc,
-				struct sctp_chunk *chunk)
+static void sctp_cmd_setup_t4(struct sctp_cmd_seq *cmds,
+			      struct sctp_association *asoc,
+			      struct sctp_chunk *chunk)
 {
 	struct sctp_transport *t;
 
@@ -936,7 +936,7 @@ static void sctp_cmd_setup_t4(sctp_cmd_seq_t *cmds,
 }
 
 /* Process an incoming Operation Error Chunk. */
-static void sctp_cmd_process_operr(sctp_cmd_seq_t *cmds,
+static void sctp_cmd_process_operr(struct sctp_cmd_seq *cmds,
 				   struct sctp_association *asoc,
 				   struct sctp_chunk *chunk)
 {
@@ -1025,9 +1025,9 @@ static void sctp_cmd_set_sk_err(struct sctp_association *asoc, int error)
 }
 
 /* Helper function to generate an association change event */
-static void sctp_cmd_assoc_change(sctp_cmd_seq_t *commands,
-				 struct sctp_association *asoc,
-				 u8 state)
+static void sctp_cmd_assoc_change(struct sctp_cmd_seq *commands,
+				  struct sctp_association *asoc,
+				  u8 state)
 {
 	struct sctp_ulpevent *ev;
 
@@ -1040,7 +1040,7 @@ static void sctp_cmd_assoc_change(sctp_cmd_seq_t *commands,
 }
 
 /* Helper function to generate an adaptation indication event */
-static void sctp_cmd_adaptation_ind(sctp_cmd_seq_t *commands,
+static void sctp_cmd_adaptation_ind(struct sctp_cmd_seq *commands,
 				    struct sctp_association *asoc)
 {
 	struct sctp_ulpevent *ev;
@@ -1145,7 +1145,7 @@ int sctp_do_sm(struct net *net, enum sctp_event event_type,
 	       struct sctp_endpoint *ep, struct sctp_association *asoc,
 	       void *event_arg, gfp_t gfp)
 {
-	sctp_cmd_seq_t commands;
+	struct sctp_cmd_seq commands;
 	const sctp_sm_table_entry_t *state_fn;
 	sctp_disposition_t status;
 	int error = 0;
@@ -1184,7 +1184,7 @@ static int sctp_side_effects(enum sctp_event event_type,
 			     struct sctp_association **asoc,
 			     void *event_arg,
 			     sctp_disposition_t status,
-			     sctp_cmd_seq_t *commands,
+			     struct sctp_cmd_seq *commands,
 			     gfp_t gfp)
 {
 	int error;
@@ -1270,7 +1270,7 @@ static int sctp_cmd_interpreter(enum sctp_event event_type,
 				struct sctp_association *asoc,
 				void *event_arg,
 				sctp_disposition_t status,
-				sctp_cmd_seq_t *commands,
+				struct sctp_cmd_seq *commands,
 				gfp_t gfp)
 {
 	struct sock *sk = ep->base.sk;
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index af93419209df..93b6f42a9252 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -67,7 +67,7 @@ static struct sctp_packet *sctp_abort_pkt_new(struct net *net,
 				  size_t paylen);
 static int sctp_eat_data(const struct sctp_association *asoc,
 			 struct sctp_chunk *chunk,
-			 sctp_cmd_seq_t *commands);
+			 struct sctp_cmd_seq *commands);
 static struct sctp_packet *sctp_ootb_pkt_new(struct net *net,
 					     const struct sctp_association *asoc,
 					     const struct sctp_chunk *chunk);
@@ -75,30 +75,30 @@ static void sctp_send_stale_cookie_err(struct net *net,
 				       const struct sctp_endpoint *ep,
 				       const struct sctp_association *asoc,
 				       const struct sctp_chunk *chunk,
-				       sctp_cmd_seq_t *commands,
+				       struct sctp_cmd_seq *commands,
 				       struct sctp_chunk *err_chunk);
 static sctp_disposition_t sctp_sf_do_5_2_6_stale(struct net *net,
 						 const struct sctp_endpoint *ep,
 						 const struct sctp_association *asoc,
 						 const union sctp_subtype type,
 						 void *arg,
-						 sctp_cmd_seq_t *commands);
+						 struct sctp_cmd_seq *commands);
 static sctp_disposition_t sctp_sf_shut_8_4_5(struct net *net,
 					     const struct sctp_endpoint *ep,
 					     const struct sctp_association *asoc,
 					     const union sctp_subtype type,
 					     void *arg,
-					     sctp_cmd_seq_t *commands);
+					     struct sctp_cmd_seq *commands);
 static sctp_disposition_t sctp_sf_tabort_8_4_8(struct net *net,
 					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
 					const union sctp_subtype type,
 					void *arg,
-					sctp_cmd_seq_t *commands);
+					struct sctp_cmd_seq *commands);
 static struct sctp_sackhdr *sctp_sm_pull_sack(struct sctp_chunk *chunk);
 
 static sctp_disposition_t sctp_stop_t1_and_abort(struct net *net,
-					   sctp_cmd_seq_t *commands,
+					   struct sctp_cmd_seq *commands,
 					   __be16 error, int sk_err,
 					   const struct sctp_association *asoc,
 					   struct sctp_transport *transport);
@@ -108,7 +108,7 @@ static sctp_disposition_t sctp_sf_abort_violation(
 				     const struct sctp_endpoint *ep,
 				     const struct sctp_association *asoc,
 				     void *arg,
-				     sctp_cmd_seq_t *commands,
+				     struct sctp_cmd_seq *commands,
 				     const __u8 *payload,
 				     const size_t paylen);
 
@@ -118,7 +118,7 @@ static sctp_disposition_t sctp_sf_violation_chunklen(
 				     const struct sctp_association *asoc,
 				     const union sctp_subtype type,
 				     void *arg,
-				     sctp_cmd_seq_t *commands);
+				     struct sctp_cmd_seq *commands);
 
 static sctp_disposition_t sctp_sf_violation_paramlen(
 				     struct net *net,
@@ -126,7 +126,7 @@ static sctp_disposition_t sctp_sf_violation_paramlen(
 				     const struct sctp_association *asoc,
 				     const union sctp_subtype type,
 				     void *arg, void *ext,
-				     sctp_cmd_seq_t *commands);
+				     struct sctp_cmd_seq *commands);
 
 static sctp_disposition_t sctp_sf_violation_ctsn(
 				     struct net *net,
@@ -134,7 +134,7 @@ static sctp_disposition_t sctp_sf_violation_ctsn(
 				     const struct sctp_association *asoc,
 				     const union sctp_subtype type,
 				     void *arg,
-				     sctp_cmd_seq_t *commands);
+				     struct sctp_cmd_seq *commands);
 
 static sctp_disposition_t sctp_sf_violation_chunk(
 				     struct net *net,
@@ -142,7 +142,7 @@ static sctp_disposition_t sctp_sf_violation_chunk(
 				     const struct sctp_association *asoc,
 				     const union sctp_subtype type,
 				     void *arg,
-				     sctp_cmd_seq_t *commands);
+				     struct sctp_cmd_seq *commands);
 
 static enum sctp_ierror sctp_sf_authenticate(
 				struct net *net,
@@ -156,7 +156,7 @@ static sctp_disposition_t __sctp_sf_do_9_1_abort(struct net *net,
 					const struct sctp_association *asoc,
 					const union sctp_subtype type,
 					void *arg,
-					sctp_cmd_seq_t *commands);
+					struct sctp_cmd_seq *commands);
 
 /* Small helper function that checks if the chunk length
  * is of the appropriate length.  The 'required_length' argument
@@ -219,7 +219,7 @@ sctp_disposition_t sctp_sf_do_4_C(struct net *net,
 				  const struct sctp_association *asoc,
 				  const union sctp_subtype type,
 				  void *arg,
-				  sctp_cmd_seq_t *commands)
+				  struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *chunk = arg;
 	struct sctp_ulpevent *ev;
@@ -305,7 +305,7 @@ sctp_disposition_t sctp_sf_do_5_1B_init(struct net *net,
 					const struct sctp_association *asoc,
 					const union sctp_subtype type,
 					void *arg,
-					sctp_cmd_seq_t *commands)
+					struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *chunk = arg, *repl, *err_chunk;
 	struct sctp_unrecognized_param *unk_param;
@@ -499,7 +499,7 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(struct net *net,
 				       const struct sctp_association *asoc,
 				       const union sctp_subtype type,
 				       void *arg,
-				       sctp_cmd_seq_t *commands)
+				       struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *chunk = arg;
 	struct sctp_init_chunk *initchunk;
@@ -648,7 +648,7 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(struct net *net,
 				      const struct sctp_endpoint *ep,
 				      const struct sctp_association *asoc,
 				      const union sctp_subtype type, void *arg,
-				      sctp_cmd_seq_t *commands)
+				      struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *chunk = arg;
 	struct sctp_association *new_asoc;
@@ -875,7 +875,7 @@ sctp_disposition_t sctp_sf_do_5_1E_ca(struct net *net,
 				      const struct sctp_endpoint *ep,
 				      const struct sctp_association *asoc,
 				      const union sctp_subtype type, void *arg,
-				      sctp_cmd_seq_t *commands)
+				      struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *chunk = arg;
 	struct sctp_ulpevent *ev;
@@ -953,7 +953,7 @@ static sctp_disposition_t sctp_sf_heartbeat(const struct sctp_endpoint *ep,
 					    const struct sctp_association *asoc,
 					    const union sctp_subtype type,
 					    void *arg,
-					    sctp_cmd_seq_t *commands)
+					    struct sctp_cmd_seq *commands)
 {
 	struct sctp_transport *transport = (struct sctp_transport *) arg;
 	struct sctp_chunk *reply;
@@ -979,7 +979,7 @@ sctp_disposition_t sctp_sf_sendbeat_8_3(struct net *net,
 					const struct sctp_association *asoc,
 					const union sctp_subtype type,
 					void *arg,
-					sctp_cmd_seq_t *commands)
+					struct sctp_cmd_seq *commands)
 {
 	struct sctp_transport *transport = (struct sctp_transport *) arg;
 
@@ -1026,7 +1026,7 @@ sctp_disposition_t sctp_sf_send_reconf(struct net *net,
 				       const struct sctp_endpoint *ep,
 				       const struct sctp_association *asoc,
 				       const union sctp_subtype type, void *arg,
-				       sctp_cmd_seq_t *commands)
+				       struct sctp_cmd_seq *commands)
 {
 	struct sctp_transport *transport = arg;
 
@@ -1078,7 +1078,7 @@ sctp_disposition_t sctp_sf_beat_8_3(struct net *net,
 				    const struct sctp_association *asoc,
 				    const union sctp_subtype type,
 				    void *arg,
-				    sctp_cmd_seq_t *commands)
+				    struct sctp_cmd_seq *commands)
 {
 	struct sctp_paramhdr *param_hdr;
 	struct sctp_chunk *chunk = arg;
@@ -1153,7 +1153,7 @@ sctp_disposition_t sctp_sf_backbeat_8_3(struct net *net,
 					const struct sctp_association *asoc,
 					const union sctp_subtype type,
 					void *arg,
-					sctp_cmd_seq_t *commands)
+					struct sctp_cmd_seq *commands)
 {
 	struct sctp_sender_hb_info *hbinfo;
 	struct sctp_chunk *chunk = arg;
@@ -1225,7 +1225,7 @@ sctp_disposition_t sctp_sf_backbeat_8_3(struct net *net,
  */
 static int sctp_sf_send_restart_abort(struct net *net, union sctp_addr *ssa,
 				      struct sctp_chunk *init,
-				      sctp_cmd_seq_t *commands)
+				      struct sctp_cmd_seq *commands)
 {
 	int len;
 	struct sctp_packet *pkt;
@@ -1290,7 +1290,7 @@ static bool list_has_sctp_addr(const struct list_head *list,
 static int sctp_sf_check_restart_addrs(const struct sctp_association *new_asoc,
 				       const struct sctp_association *asoc,
 				       struct sctp_chunk *init,
-				       sctp_cmd_seq_t *commands)
+				       struct sctp_cmd_seq *commands)
 {
 	struct net *net = sock_net(new_asoc->base.sk);
 	struct sctp_transport *new_addr;
@@ -1415,7 +1415,7 @@ static sctp_disposition_t sctp_sf_do_unexpected_init(
 	const struct sctp_endpoint *ep,
 	const struct sctp_association *asoc,
 	const union sctp_subtype type,
-	void *arg, sctp_cmd_seq_t *commands)
+	void *arg, struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *chunk = arg, *repl, *err_chunk;
 	struct sctp_unrecognized_param *unk_param;
@@ -1627,7 +1627,7 @@ sctp_disposition_t sctp_sf_do_5_2_1_siminit(struct net *net,
 				    const struct sctp_association *asoc,
 				    const union sctp_subtype type,
 				    void *arg,
-				    sctp_cmd_seq_t *commands)
+				    struct sctp_cmd_seq *commands)
 {
 	/* Call helper to do the real work for both simulataneous and
 	 * duplicate INIT chunk handling.
@@ -1681,7 +1681,7 @@ sctp_disposition_t sctp_sf_do_5_2_2_dupinit(struct net *net,
 					const struct sctp_association *asoc,
 					const union sctp_subtype type,
 					void *arg,
-					sctp_cmd_seq_t *commands)
+					struct sctp_cmd_seq *commands)
 {
 	/* Call helper to do the real work for both simulataneous and
 	 * duplicate INIT chunk handling.
@@ -1703,7 +1703,8 @@ sctp_disposition_t sctp_sf_do_5_2_3_initack(struct net *net,
 					    const struct sctp_endpoint *ep,
 					    const struct sctp_association *asoc,
 					    const union sctp_subtype type,
-					    void *arg, sctp_cmd_seq_t *commands)
+					    void *arg,
+					    struct sctp_cmd_seq *commands)
 {
 	/* Per the above section, we'll discard the chunk if we have an
 	 * endpoint.  If this is an OOTB INIT-ACK, treat it as such.
@@ -1723,7 +1724,7 @@ static sctp_disposition_t sctp_sf_do_dupcook_a(struct net *net,
 					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
 					struct sctp_chunk *chunk,
-					sctp_cmd_seq_t *commands,
+					struct sctp_cmd_seq *commands,
 					struct sctp_association *new_asoc)
 {
 	struct sctp_init_chunk *peer_init;
@@ -1838,7 +1839,7 @@ static sctp_disposition_t sctp_sf_do_dupcook_b(struct net *net,
 					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
 					struct sctp_chunk *chunk,
-					sctp_cmd_seq_t *commands,
+					struct sctp_cmd_seq *commands,
 					struct sctp_association *new_asoc)
 {
 	struct sctp_init_chunk *peer_init;
@@ -1909,7 +1910,7 @@ static sctp_disposition_t sctp_sf_do_dupcook_c(struct net *net,
 					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
 					struct sctp_chunk *chunk,
-					sctp_cmd_seq_t *commands,
+					struct sctp_cmd_seq *commands,
 					struct sctp_association *new_asoc)
 {
 	/* The cookie should be silently discarded.
@@ -1931,7 +1932,7 @@ static sctp_disposition_t sctp_sf_do_dupcook_d(struct net *net,
 					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
 					struct sctp_chunk *chunk,
-					sctp_cmd_seq_t *commands,
+					struct sctp_cmd_seq *commands,
 					struct sctp_association *new_asoc)
 {
 	struct sctp_ulpevent *ev = NULL, *ai_ev = NULL;
@@ -2027,7 +2028,7 @@ sctp_disposition_t sctp_sf_do_5_2_4_dupcook(struct net *net,
 					const struct sctp_association *asoc,
 					const union sctp_subtype type,
 					void *arg,
-					sctp_cmd_seq_t *commands)
+					struct sctp_cmd_seq *commands)
 {
 	sctp_disposition_t retval;
 	struct sctp_chunk *chunk = arg;
@@ -2146,7 +2147,7 @@ sctp_disposition_t sctp_sf_shutdown_pending_abort(
 	const struct sctp_association *asoc,
 	const union sctp_subtype type,
 	void *arg,
-	sctp_cmd_seq_t *commands)
+	struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *chunk = arg;
 
@@ -2188,7 +2189,7 @@ sctp_disposition_t sctp_sf_shutdown_sent_abort(struct net *net,
 					const struct sctp_association *asoc,
 					const union sctp_subtype type,
 					void *arg,
-					sctp_cmd_seq_t *commands)
+					struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *chunk = arg;
 
@@ -2239,7 +2240,7 @@ sctp_disposition_t sctp_sf_shutdown_ack_sent_abort(
 	const struct sctp_association *asoc,
 	const union sctp_subtype type,
 	void *arg,
-	sctp_cmd_seq_t *commands)
+	struct sctp_cmd_seq *commands)
 {
 	/* The same T2 timer, so we should be able to use
 	 * common function with the SHUTDOWN-SENT state.
@@ -2266,7 +2267,7 @@ sctp_disposition_t sctp_sf_cookie_echoed_err(struct net *net,
 					const struct sctp_association *asoc,
 					const union sctp_subtype type,
 					void *arg,
-					sctp_cmd_seq_t *commands)
+					struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *chunk = arg;
 	struct sctp_errhdr *err;
@@ -2330,7 +2331,7 @@ static sctp_disposition_t sctp_sf_do_5_2_6_stale(struct net *net,
 						 const struct sctp_association *asoc,
 						 const union sctp_subtype type,
 						 void *arg,
-						 sctp_cmd_seq_t *commands)
+						 struct sctp_cmd_seq *commands)
 {
 	int attempts = asoc->init_err_counter + 1;
 	struct sctp_chunk *chunk = arg, *reply;
@@ -2452,7 +2453,7 @@ sctp_disposition_t sctp_sf_do_9_1_abort(struct net *net,
 					const struct sctp_association *asoc,
 					const union sctp_subtype type,
 					void *arg,
-					sctp_cmd_seq_t *commands)
+					struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *chunk = arg;
 
@@ -2489,7 +2490,7 @@ static sctp_disposition_t __sctp_sf_do_9_1_abort(struct net *net,
 					const struct sctp_association *asoc,
 					const union sctp_subtype type,
 					void *arg,
-					sctp_cmd_seq_t *commands)
+					struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *chunk = arg;
 	unsigned int len;
@@ -2527,7 +2528,7 @@ sctp_disposition_t sctp_sf_cookie_wait_abort(struct net *net,
 				     const struct sctp_association *asoc,
 				     const union sctp_subtype type,
 				     void *arg,
-				     sctp_cmd_seq_t *commands)
+				     struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *chunk = arg;
 	unsigned int len;
@@ -2566,7 +2567,7 @@ sctp_disposition_t sctp_sf_cookie_wait_icmp_abort(struct net *net,
 					const struct sctp_association *asoc,
 					const union sctp_subtype type,
 					void *arg,
-					sctp_cmd_seq_t *commands)
+					struct sctp_cmd_seq *commands)
 {
 	return sctp_stop_t1_and_abort(net, commands, SCTP_ERROR_NO_ERROR,
 				      ENOPROTOOPT, asoc,
@@ -2581,7 +2582,7 @@ sctp_disposition_t sctp_sf_cookie_echoed_abort(struct net *net,
 					       const struct sctp_association *asoc,
 					       const union sctp_subtype type,
 					       void *arg,
-					       sctp_cmd_seq_t *commands)
+					       struct sctp_cmd_seq *commands)
 {
 	/* There is a single T1 timer, so we should be able to use
 	 * common function with the COOKIE-WAIT state.
@@ -2595,7 +2596,7 @@ sctp_disposition_t sctp_sf_cookie_echoed_abort(struct net *net,
  * This is common code called by several sctp_sf_*_abort() functions above.
  */
 static sctp_disposition_t sctp_stop_t1_and_abort(struct net *net,
-					   sctp_cmd_seq_t *commands,
+					   struct sctp_cmd_seq *commands,
 					   __be16 error, int sk_err,
 					   const struct sctp_association *asoc,
 					   struct sctp_transport *transport)
@@ -2653,7 +2654,7 @@ sctp_disposition_t sctp_sf_do_9_2_shutdown(struct net *net,
 					   const struct sctp_association *asoc,
 					   const union sctp_subtype type,
 					   void *arg,
-					   sctp_cmd_seq_t *commands)
+					   struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *chunk = arg;
 	sctp_disposition_t disposition;
@@ -2742,7 +2743,7 @@ sctp_disposition_t sctp_sf_do_9_2_shut_ctsn(struct net *net,
 					   const struct sctp_association *asoc,
 					   const union sctp_subtype type,
 					   void *arg,
-					   sctp_cmd_seq_t *commands)
+					   struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *chunk = arg;
 	struct sctp_shutdownhdr *sdh;
@@ -2795,7 +2796,7 @@ sctp_disposition_t sctp_sf_do_9_2_reshutack(struct net *net,
 				    const struct sctp_association *asoc,
 				    const union sctp_subtype type,
 				    void *arg,
-				    sctp_cmd_seq_t *commands)
+				    struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *chunk = (struct sctp_chunk *) arg;
 	struct sctp_chunk *reply;
@@ -2859,7 +2860,7 @@ sctp_disposition_t sctp_sf_do_ecn_cwr(struct net *net,
 				      const struct sctp_association *asoc,
 				      const union sctp_subtype type,
 				      void *arg,
-				      sctp_cmd_seq_t *commands)
+				      struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *chunk = arg;
 	struct sctp_cwrhdr *cwr;
@@ -2915,7 +2916,7 @@ sctp_disposition_t sctp_sf_do_ecne(struct net *net,
 				   const struct sctp_association *asoc,
 				   const union sctp_subtype type,
 				   void *arg,
-				   sctp_cmd_seq_t *commands)
+				   struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *chunk = arg;
 	struct sctp_ecnehdr *ecne;
@@ -2972,7 +2973,7 @@ sctp_disposition_t sctp_sf_eat_data_6_2(struct net *net,
 					const struct sctp_association *asoc,
 					const union sctp_subtype type,
 					void *arg,
-					sctp_cmd_seq_t *commands)
+					struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *chunk = arg;
 	sctp_arg_t force = SCTP_NOFORCE();
@@ -3092,7 +3093,7 @@ sctp_disposition_t sctp_sf_eat_data_fast_4_4(struct net *net,
 				     const struct sctp_association *asoc,
 				     const union sctp_subtype type,
 				     void *arg,
-				     sctp_cmd_seq_t *commands)
+				     struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *chunk = arg;
 	int error;
@@ -3183,7 +3184,7 @@ sctp_disposition_t sctp_sf_eat_sack_6_2(struct net *net,
 					const struct sctp_association *asoc,
 					const union sctp_subtype type,
 					void *arg,
-					sctp_cmd_seq_t *commands)
+					struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *chunk = arg;
 	struct sctp_sackhdr *sackh;
@@ -3257,7 +3258,7 @@ static sctp_disposition_t sctp_sf_tabort_8_4_8(struct net *net,
 					const struct sctp_association *asoc,
 					const union sctp_subtype type,
 					void *arg,
-					sctp_cmd_seq_t *commands)
+					struct sctp_cmd_seq *commands)
 {
 	struct sctp_packet *packet = NULL;
 	struct sctp_chunk *chunk = arg;
@@ -3307,7 +3308,7 @@ sctp_disposition_t sctp_sf_operr_notify(struct net *net,
 					const struct sctp_association *asoc,
 					const union sctp_subtype type,
 					void *arg,
-					sctp_cmd_seq_t *commands)
+					struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *chunk = arg;
 	struct sctp_errhdr *err;
@@ -3345,7 +3346,7 @@ sctp_disposition_t sctp_sf_do_9_2_final(struct net *net,
 					const struct sctp_association *asoc,
 					const union sctp_subtype type,
 					void *arg,
-					sctp_cmd_seq_t *commands)
+					struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *chunk = arg;
 	struct sctp_chunk *reply;
@@ -3428,7 +3429,7 @@ sctp_disposition_t sctp_sf_ootb(struct net *net,
 				const struct sctp_association *asoc,
 				const union sctp_subtype type,
 				void *arg,
-				sctp_cmd_seq_t *commands)
+				struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *chunk = arg;
 	struct sk_buff *skb = chunk->skb;
@@ -3521,7 +3522,7 @@ static sctp_disposition_t sctp_sf_shut_8_4_5(struct net *net,
 					     const struct sctp_association *asoc,
 					     const union sctp_subtype type,
 					     void *arg,
-					     sctp_cmd_seq_t *commands)
+					     struct sctp_cmd_seq *commands)
 {
 	struct sctp_packet *packet = NULL;
 	struct sctp_chunk *chunk = arg;
@@ -3583,7 +3584,7 @@ sctp_disposition_t sctp_sf_do_8_5_1_E_sa(struct net *net,
 				      const struct sctp_association *asoc,
 				      const union sctp_subtype type,
 				      void *arg,
-				      sctp_cmd_seq_t *commands)
+				      struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *chunk = arg;
 
@@ -3607,7 +3608,7 @@ sctp_disposition_t sctp_sf_do_asconf(struct net *net,
 				     const struct sctp_endpoint *ep,
 				     const struct sctp_association *asoc,
 				     const union sctp_subtype type, void *arg,
-				     sctp_cmd_seq_t *commands)
+				     struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk	*chunk = arg;
 	struct sctp_chunk	*asconf_ack = NULL;
@@ -3725,7 +3726,7 @@ sctp_disposition_t sctp_sf_do_asconf_ack(struct net *net,
 					 const struct sctp_association *asoc,
 					 const union sctp_subtype type,
 					 void *arg,
-					 sctp_cmd_seq_t *commands)
+					 struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk	*asconf_ack = arg;
 	struct sctp_chunk	*last_asconf = asoc->addip_last_asconf;
@@ -3843,7 +3844,7 @@ sctp_disposition_t sctp_sf_do_reconf(struct net *net,
 				     const struct sctp_endpoint *ep,
 				     const struct sctp_association *asoc,
 				     const union sctp_subtype type, void *arg,
-				     sctp_cmd_seq_t *commands)
+				     struct sctp_cmd_seq *commands)
 {
 	struct sctp_paramhdr *err_param = NULL;
 	struct sctp_chunk *chunk = arg;
@@ -3920,7 +3921,7 @@ sctp_disposition_t sctp_sf_eat_fwd_tsn(struct net *net,
 				       const struct sctp_association *asoc,
 				       const union sctp_subtype type,
 				       void *arg,
-				       sctp_cmd_seq_t *commands)
+				       struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *chunk = arg;
 	struct sctp_fwdtsn_hdr *fwdtsn_hdr;
@@ -3991,7 +3992,7 @@ sctp_disposition_t sctp_sf_eat_fwd_tsn_fast(
 	const struct sctp_association *asoc,
 	const union sctp_subtype type,
 	void *arg,
-	sctp_cmd_seq_t *commands)
+	struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *chunk = arg;
 	struct sctp_fwdtsn_hdr *fwdtsn_hdr;
@@ -4158,7 +4159,7 @@ sctp_disposition_t sctp_sf_eat_auth(struct net *net,
 				    const struct sctp_association *asoc,
 				    const union sctp_subtype type,
 				    void *arg,
-				    sctp_cmd_seq_t *commands)
+				    struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *chunk = arg;
 	struct sctp_authhdr *auth_hdr;
@@ -4255,7 +4256,7 @@ sctp_disposition_t sctp_sf_unk_chunk(struct net *net,
 				     const struct sctp_association *asoc,
 				     const union sctp_subtype type,
 				     void *arg,
-				     sctp_cmd_seq_t *commands)
+				     struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *unk_chunk = arg;
 	struct sctp_chunk *err_chunk;
@@ -4335,7 +4336,7 @@ sctp_disposition_t sctp_sf_discard_chunk(struct net *net,
 					 const struct sctp_association *asoc,
 					 const union sctp_subtype type,
 					 void *arg,
-					 sctp_cmd_seq_t *commands)
+					 struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *chunk = arg;
 
@@ -4375,7 +4376,7 @@ sctp_disposition_t sctp_sf_pdiscard(struct net *net,
 				    const struct sctp_association *asoc,
 				    const union sctp_subtype type,
 				    void *arg,
-				    sctp_cmd_seq_t *commands)
+				    struct sctp_cmd_seq *commands)
 {
 	SCTP_INC_STATS(net, SCTP_MIB_IN_PKT_DISCARDS);
 	sctp_add_cmd_sf(commands, SCTP_CMD_DISCARD_PACKET, SCTP_NULL());
@@ -4403,7 +4404,7 @@ sctp_disposition_t sctp_sf_violation(struct net *net,
 				     const struct sctp_association *asoc,
 				     const union sctp_subtype type,
 				     void *arg,
-				     sctp_cmd_seq_t *commands)
+				     struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *chunk = arg;
 
@@ -4423,7 +4424,7 @@ static sctp_disposition_t sctp_sf_abort_violation(
 				     const struct sctp_endpoint *ep,
 				     const struct sctp_association *asoc,
 				     void *arg,
-				     sctp_cmd_seq_t *commands,
+				     struct sctp_cmd_seq *commands,
 				     const __u8 *payload,
 				     const size_t paylen)
 {
@@ -4541,7 +4542,7 @@ static sctp_disposition_t sctp_sf_violation_chunklen(
 				     const struct sctp_association *asoc,
 				     const union sctp_subtype type,
 				     void *arg,
-				     sctp_cmd_seq_t *commands)
+				     struct sctp_cmd_seq *commands)
 {
 	static const char err_str[] = "The following chunk had invalid length:";
 
@@ -4561,7 +4562,7 @@ static sctp_disposition_t sctp_sf_violation_paramlen(
 				     const struct sctp_association *asoc,
 				     const union sctp_subtype type,
 				     void *arg, void *ext,
-				     sctp_cmd_seq_t *commands)
+				     struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *chunk =  arg;
 	struct sctp_paramhdr *param = ext;
@@ -4604,7 +4605,7 @@ static sctp_disposition_t sctp_sf_violation_ctsn(
 				     const struct sctp_association *asoc,
 				     const union sctp_subtype type,
 				     void *arg,
-				     sctp_cmd_seq_t *commands)
+				     struct sctp_cmd_seq *commands)
 {
 	static const char err_str[] = "The cumulative tsn ack beyond the max tsn currently sent:";
 
@@ -4624,7 +4625,7 @@ static sctp_disposition_t sctp_sf_violation_chunk(
 				     const struct sctp_association *asoc,
 				     const union sctp_subtype type,
 				     void *arg,
-				     sctp_cmd_seq_t *commands)
+				     struct sctp_cmd_seq *commands)
 {
 	static const char err_str[] = "The following chunk violates protocol:";
 
@@ -4699,7 +4700,7 @@ sctp_disposition_t sctp_sf_do_prm_asoc(struct net *net,
 				       const struct sctp_association *asoc,
 				       const union sctp_subtype type,
 				       void *arg,
-				       sctp_cmd_seq_t *commands)
+				       struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *repl;
 	struct sctp_association *my_asoc;
@@ -4811,7 +4812,7 @@ sctp_disposition_t sctp_sf_do_prm_send(struct net *net,
 				       const struct sctp_association *asoc,
 				       const union sctp_subtype type,
 				       void *arg,
-				       sctp_cmd_seq_t *commands)
+				       struct sctp_cmd_seq *commands)
 {
 	struct sctp_datamsg *msg = arg;
 
@@ -4851,7 +4852,7 @@ sctp_disposition_t sctp_sf_do_9_2_prm_shutdown(
 	const struct sctp_association *asoc,
 	const union sctp_subtype type,
 	void *arg,
-	sctp_cmd_seq_t *commands)
+	struct sctp_cmd_seq *commands)
 {
 	int disposition;
 
@@ -4907,7 +4908,7 @@ sctp_disposition_t sctp_sf_do_9_1_prm_abort(
 	const struct sctp_association *asoc,
 	const union sctp_subtype type,
 	void *arg,
-	sctp_cmd_seq_t *commands)
+	struct sctp_cmd_seq *commands)
 {
 	/* From 9.1 Abort of an Association
 	 * Upon receipt of the ABORT primitive from its upper
@@ -4944,7 +4945,7 @@ sctp_disposition_t sctp_sf_error_closed(struct net *net,
 					const struct sctp_association *asoc,
 					const union sctp_subtype type,
 					void *arg,
-					sctp_cmd_seq_t *commands)
+					struct sctp_cmd_seq *commands)
 {
 	sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_ERROR, SCTP_ERROR(-EINVAL));
 	return SCTP_DISPOSITION_CONSUME;
@@ -4958,7 +4959,7 @@ sctp_disposition_t sctp_sf_error_shutdown(struct net *net,
 					  const struct sctp_association *asoc,
 					  const union sctp_subtype type,
 					  void *arg,
-					  sctp_cmd_seq_t *commands)
+					  struct sctp_cmd_seq *commands)
 {
 	sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_ERROR,
 			SCTP_ERROR(-ESHUTDOWN));
@@ -4985,7 +4986,7 @@ sctp_disposition_t sctp_sf_cookie_wait_prm_shutdown(
 	const struct sctp_association *asoc,
 	const union sctp_subtype type,
 	void *arg,
-	sctp_cmd_seq_t *commands)
+	struct sctp_cmd_seq *commands)
 {
 	sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP,
 			SCTP_TO(SCTP_EVENT_TIMEOUT_T1_INIT));
@@ -5019,7 +5020,7 @@ sctp_disposition_t sctp_sf_cookie_echoed_prm_shutdown(
 	const struct sctp_endpoint *ep,
 	const struct sctp_association *asoc,
 	const union sctp_subtype type,
-	void *arg, sctp_cmd_seq_t *commands)
+	void *arg, struct sctp_cmd_seq *commands)
 {
 	/* There is a single T1 timer, so we should be able to use
 	 * common function with the COOKIE-WAIT state.
@@ -5047,7 +5048,7 @@ sctp_disposition_t sctp_sf_cookie_wait_prm_abort(
 	const struct sctp_association *asoc,
 	const union sctp_subtype type,
 	void *arg,
-	sctp_cmd_seq_t *commands)
+	struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *abort = arg;
 
@@ -5096,7 +5097,7 @@ sctp_disposition_t sctp_sf_cookie_echoed_prm_abort(
 	const struct sctp_association *asoc,
 	const union sctp_subtype type,
 	void *arg,
-	sctp_cmd_seq_t *commands)
+	struct sctp_cmd_seq *commands)
 {
 	/* There is a single T1 timer, so we should be able to use
 	 * common function with the COOKIE-WAIT state.
@@ -5122,7 +5123,7 @@ sctp_disposition_t sctp_sf_shutdown_pending_prm_abort(
 	const struct sctp_association *asoc,
 	const union sctp_subtype type,
 	void *arg,
-	sctp_cmd_seq_t *commands)
+	struct sctp_cmd_seq *commands)
 {
 	/* Stop the T5-shutdown guard timer.  */
 	sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP,
@@ -5149,7 +5150,7 @@ sctp_disposition_t sctp_sf_shutdown_sent_prm_abort(
 	const struct sctp_association *asoc,
 	const union sctp_subtype type,
 	void *arg,
-	sctp_cmd_seq_t *commands)
+	struct sctp_cmd_seq *commands)
 {
 	/* Stop the T2-shutdown timer.  */
 	sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP,
@@ -5180,7 +5181,7 @@ sctp_disposition_t sctp_sf_shutdown_ack_sent_prm_abort(
 	const struct sctp_association *asoc,
 	const union sctp_subtype type,
 	void *arg,
-	sctp_cmd_seq_t *commands)
+	struct sctp_cmd_seq *commands)
 {
 	/* The same T2 timer, so we should be able to use
 	 * common function with the SHUTDOWN-SENT state.
@@ -5216,7 +5217,7 @@ sctp_disposition_t sctp_sf_do_prm_requestheartbeat(
 					const struct sctp_association *asoc,
 					const union sctp_subtype type,
 					void *arg,
-					sctp_cmd_seq_t *commands)
+					struct sctp_cmd_seq *commands)
 {
 	if (SCTP_DISPOSITION_NOMEM == sctp_sf_heartbeat(ep, asoc, type,
 				      (struct sctp_transport *)arg, commands))
@@ -5248,7 +5249,7 @@ sctp_disposition_t sctp_sf_do_prm_asconf(struct net *net,
 					const struct sctp_association *asoc,
 					const union sctp_subtype type,
 					void *arg,
-					sctp_cmd_seq_t *commands)
+					struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *chunk = arg;
 
@@ -5264,7 +5265,8 @@ sctp_disposition_t sctp_sf_do_prm_reconf(struct net *net,
 					 const struct sctp_endpoint *ep,
 					 const struct sctp_association *asoc,
 					 const union sctp_subtype type,
-					 void *arg, sctp_cmd_seq_t *commands)
+					 void *arg,
+					 struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *chunk = arg;
 
@@ -5283,7 +5285,7 @@ sctp_disposition_t sctp_sf_ignore_primitive(
 	const struct sctp_association *asoc,
 	const union sctp_subtype type,
 	void *arg,
-	sctp_cmd_seq_t *commands)
+	struct sctp_cmd_seq *commands)
 {
 	pr_debug("%s: primitive type:%d is ignored\n", __func__,
 		 type.primitive);
@@ -5307,7 +5309,7 @@ sctp_disposition_t sctp_sf_do_no_pending_tsn(
 	const struct sctp_association *asoc,
 	const union sctp_subtype type,
 	void *arg,
-	sctp_cmd_seq_t *commands)
+	struct sctp_cmd_seq *commands)
 {
 	struct sctp_ulpevent *event;
 
@@ -5339,7 +5341,7 @@ sctp_disposition_t sctp_sf_do_9_2_start_shutdown(
 	const struct sctp_association *asoc,
 	const union sctp_subtype type,
 	void *arg,
-	sctp_cmd_seq_t *commands)
+	struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *reply;
 
@@ -5409,7 +5411,7 @@ sctp_disposition_t sctp_sf_do_9_2_shutdown_ack(
 	const struct sctp_association *asoc,
 	const union sctp_subtype type,
 	void *arg,
-	sctp_cmd_seq_t *commands)
+	struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *chunk = (struct sctp_chunk *) arg;
 	struct sctp_chunk *reply;
@@ -5482,7 +5484,7 @@ sctp_disposition_t sctp_sf_ignore_other(struct net *net,
 					const struct sctp_association *asoc,
 					const union sctp_subtype type,
 					void *arg,
-					sctp_cmd_seq_t *commands)
+					struct sctp_cmd_seq *commands)
 {
 	pr_debug("%s: the event other type:%d is ignored\n",
 		 __func__, type.other);
@@ -5510,7 +5512,7 @@ sctp_disposition_t sctp_sf_do_6_3_3_rtx(struct net *net,
 					const struct sctp_association *asoc,
 					const union sctp_subtype type,
 					void *arg,
-					sctp_cmd_seq_t *commands)
+					struct sctp_cmd_seq *commands)
 {
 	struct sctp_transport *transport = arg;
 
@@ -5598,7 +5600,7 @@ sctp_disposition_t sctp_sf_do_6_2_sack(struct net *net,
 				       const struct sctp_association *asoc,
 				       const union sctp_subtype type,
 				       void *arg,
-				       sctp_cmd_seq_t *commands)
+				       struct sctp_cmd_seq *commands)
 {
 	SCTP_INC_STATS(net, SCTP_MIB_DELAY_SACK_EXPIREDS);
 	sctp_add_cmd_sf(commands, SCTP_CMD_GEN_SACK, SCTP_FORCE());
@@ -5629,7 +5631,7 @@ sctp_disposition_t sctp_sf_t1_init_timer_expire(struct net *net,
 					   const struct sctp_association *asoc,
 					   const union sctp_subtype type,
 					   void *arg,
-					   sctp_cmd_seq_t *commands)
+					   struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *repl = NULL;
 	struct sctp_bind_addr *bp;
@@ -5693,7 +5695,7 @@ sctp_disposition_t sctp_sf_t1_cookie_timer_expire(struct net *net,
 					   const struct sctp_association *asoc,
 					   const union sctp_subtype type,
 					   void *arg,
-					   sctp_cmd_seq_t *commands)
+					   struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *repl = NULL;
 	int attempts = asoc->init_err_counter + 1;
@@ -5743,7 +5745,7 @@ sctp_disposition_t sctp_sf_t2_timer_expire(struct net *net,
 					   const struct sctp_association *asoc,
 					   const union sctp_subtype type,
 					   void *arg,
-					   sctp_cmd_seq_t *commands)
+					   struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *reply = NULL;
 
@@ -5814,7 +5816,7 @@ sctp_disposition_t sctp_sf_t4_timer_expire(
 	const struct sctp_association *asoc,
 	const union sctp_subtype type,
 	void *arg,
-	sctp_cmd_seq_t *commands)
+	struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *chunk = asoc->addip_last_asconf;
 	struct sctp_transport *transport = chunk->transport;
@@ -5885,7 +5887,7 @@ sctp_disposition_t sctp_sf_t5_timer_expire(struct net *net,
 					   const struct sctp_association *asoc,
 					   const union sctp_subtype type,
 					   void *arg,
-					   sctp_cmd_seq_t *commands)
+					   struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *reply = NULL;
 
@@ -5922,7 +5924,7 @@ sctp_disposition_t sctp_sf_autoclose_timer_expire(
 	const struct sctp_association *asoc,
 	const union sctp_subtype type,
 	void *arg,
-	sctp_cmd_seq_t *commands)
+	struct sctp_cmd_seq *commands)
 {
 	int disposition;
 
@@ -5964,7 +5966,7 @@ sctp_disposition_t sctp_sf_not_impl(struct net *net,
 				    const struct sctp_association *asoc,
 				    const union sctp_subtype type,
 				    void *arg,
-				    sctp_cmd_seq_t *commands)
+				    struct sctp_cmd_seq *commands)
 {
 	return SCTP_DISPOSITION_NOT_IMPL;
 }
@@ -5982,7 +5984,7 @@ sctp_disposition_t sctp_sf_bug(struct net *net,
 			       const struct sctp_association *asoc,
 			       const union sctp_subtype type,
 			       void *arg,
-			       sctp_cmd_seq_t *commands)
+			       struct sctp_cmd_seq *commands)
 {
 	return SCTP_DISPOSITION_BUG;
 }
@@ -6003,7 +6005,7 @@ sctp_disposition_t sctp_sf_timer_ignore(struct net *net,
 					const struct sctp_association *asoc,
 					const union sctp_subtype type,
 					void *arg,
-					sctp_cmd_seq_t *commands)
+					struct sctp_cmd_seq *commands)
 {
 	pr_debug("%s: timer %d ignored\n", __func__, type.chunk);
 
@@ -6169,7 +6171,7 @@ static void sctp_send_stale_cookie_err(struct net *net,
 				       const struct sctp_endpoint *ep,
 				       const struct sctp_association *asoc,
 				       const struct sctp_chunk *chunk,
-				       sctp_cmd_seq_t *commands,
+				       struct sctp_cmd_seq *commands,
 				       struct sctp_chunk *err_chunk)
 {
 	struct sctp_packet *packet;
@@ -6198,7 +6200,7 @@ static void sctp_send_stale_cookie_err(struct net *net,
 /* Process a data chunk */
 static int sctp_eat_data(const struct sctp_association *asoc,
 			 struct sctp_chunk *chunk,
-			 sctp_cmd_seq_t *commands)
+			 struct sctp_cmd_seq *commands)
 {
 	struct sctp_datahdr *data_hdr;
 	struct sctp_chunk *err;
-- 
cgit v1.2.3


From c488b7704ed0eed18e11f9b685931558735f2a68 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Fri, 11 Aug 2017 10:23:53 +0800
Subject: sctp: remove the typedef sctp_arg_t

This patch is to remove the typedef sctp_arg_t, and
replace with union sctp_arg in the places where it's
using this typedef.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/command.h | 26 +++++++++++++-------------
 net/sctp/sm_statefuns.c    |  2 +-
 2 files changed, 14 insertions(+), 14 deletions(-)

(limited to 'include/net')

diff --git a/include/net/sctp/command.h b/include/net/sctp/command.h
index cbf6798866ef..f5fc425b5a4f 100644
--- a/include/net/sctp/command.h
+++ b/include/net/sctp/command.h
@@ -117,7 +117,7 @@ typedef enum {
  */
 #define SCTP_MAX_NUM_COMMANDS 20
 
-typedef union {
+union sctp_arg {
 	void *zero_all;	/* Set to NULL to clear the entire union */
 	__s32 i32;
 	__u32 u32;
@@ -137,24 +137,24 @@ typedef union {
 	struct sctp_packet *packet;
 	struct sctp_sackhdr *sackh;
 	struct sctp_datamsg *msg;
-} sctp_arg_t;
+};
 
 /* We are simulating ML type constructors here.
  *
  * SCTP_ARG_CONSTRUCTOR(NAME, TYPE, ELT) builds a function called
  * SCTP_NAME() which takes an argument of type TYPE and returns an
- * sctp_arg_t.  It does this by inserting the sole argument into the
- * ELT union element of a local sctp_arg_t.
+ * union sctp_arg.  It does this by inserting the sole argument into
+ * the ELT union element of a local union sctp_arg.
  *
  * E.g., SCTP_ARG_CONSTRUCTOR(I32, __s32, i32) builds SCTP_I32(arg),
- * which takes an __s32 and returns a sctp_arg_t containing the
+ * which takes an __s32 and returns a union sctp_arg containing the
  * __s32.  So, after foo = SCTP_I32(arg), foo.i32 == arg.
  */
 
 #define SCTP_ARG_CONSTRUCTOR(name, type, elt) \
-static inline sctp_arg_t	\
+static inline union sctp_arg	\
 SCTP_## name (type arg)		\
-{ sctp_arg_t retval;\
+{ union sctp_arg retval;\
   retval.zero_all = NULL;\
   retval.elt = arg;\
   return retval;\
@@ -179,25 +179,25 @@ SCTP_ARG_CONSTRUCTOR(PACKET,	struct sctp_packet *, packet)
 SCTP_ARG_CONSTRUCTOR(SACKH,	struct sctp_sackhdr *, sackh)
 SCTP_ARG_CONSTRUCTOR(DATAMSG,	struct sctp_datamsg *, msg)
 
-static inline sctp_arg_t SCTP_FORCE(void)
+static inline union sctp_arg SCTP_FORCE(void)
 {
 	return SCTP_I32(1);
 }
 
-static inline sctp_arg_t SCTP_NOFORCE(void)
+static inline union sctp_arg SCTP_NOFORCE(void)
 {
 	return SCTP_I32(0);
 }
 
-static inline sctp_arg_t SCTP_NULL(void)
+static inline union sctp_arg SCTP_NULL(void)
 {
-	sctp_arg_t retval;
+	union sctp_arg retval;
 	retval.zero_all = NULL;
 	return retval;
 }
 
 struct sctp_cmd {
-	sctp_arg_t obj;
+	union sctp_arg obj;
 	sctp_verb_t verb;
 };
 
@@ -226,7 +226,7 @@ static inline int sctp_init_cmd_seq(struct sctp_cmd_seq *seq)
  * to wrap data which goes in the obj argument.
  */
 static inline void sctp_add_cmd_sf(struct sctp_cmd_seq *seq, sctp_verb_t verb,
-				   sctp_arg_t obj)
+				   union sctp_arg obj)
 {
 	struct sctp_cmd *cmd = seq->last_used_slot - 1;
 
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 93b6f42a9252..3394c4d34ea4 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -2975,8 +2975,8 @@ sctp_disposition_t sctp_sf_eat_data_6_2(struct net *net,
 					void *arg,
 					struct sctp_cmd_seq *commands)
 {
+	union sctp_arg force = SCTP_NOFORCE();
 	struct sctp_chunk *chunk = arg;
-	sctp_arg_t force = SCTP_NOFORCE();
 	int error;
 
 	if (!sctp_vtag_verify(chunk, asoc)) {
-- 
cgit v1.2.3


From e08af95df1130883762b388a19bb150ae5d16c09 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Fri, 11 Aug 2017 10:23:54 +0800
Subject: sctp: remove the typedef sctp_verb_t

This patch is to remove the typedef sctp_verb_t, and
replace with enum sctp_verb in the places where it's
using this typedef.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/command.h | 10 +++++-----
 net/sctp/sm_statefuns.c    |  2 +-
 2 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/net')

diff --git a/include/net/sctp/command.h b/include/net/sctp/command.h
index f5fc425b5a4f..b55c6a48a206 100644
--- a/include/net/sctp/command.h
+++ b/include/net/sctp/command.h
@@ -40,7 +40,7 @@
 #include <net/sctp/structs.h>
 
 
-typedef enum {
+enum sctp_verb {
 	SCTP_CMD_NOP = 0,	/* Do nothing. */
 	SCTP_CMD_NEW_ASOC,	/* Register a new association.  */
 	SCTP_CMD_DELETE_TCB,	/* Delete the current association. */
@@ -108,7 +108,7 @@ typedef enum {
 	SCTP_CMD_PURGE_ASCONF_QUEUE, /* Purge all asconf queues.*/
 	SCTP_CMD_SET_ASOC,	 /* Restore association context */
 	SCTP_CMD_LAST
-} sctp_verb_t;
+};
 
 /* How many commands can you put in an struct sctp_cmd_seq?
  * This is a rather arbitrary number, ideally derived from a careful
@@ -198,7 +198,7 @@ static inline union sctp_arg SCTP_NULL(void)
 
 struct sctp_cmd {
 	union sctp_arg obj;
-	sctp_verb_t verb;
+	enum sctp_verb verb;
 };
 
 struct sctp_cmd_seq {
@@ -225,8 +225,8 @@ static inline int sctp_init_cmd_seq(struct sctp_cmd_seq *seq)
  * Use the SCTP_* constructors defined by SCTP_ARG_CONSTRUCTOR() above
  * to wrap data which goes in the obj argument.
  */
-static inline void sctp_add_cmd_sf(struct sctp_cmd_seq *seq, sctp_verb_t verb,
-				   union sctp_arg obj)
+static inline void sctp_add_cmd_sf(struct sctp_cmd_seq *seq,
+				   enum sctp_verb verb, union sctp_arg obj)
 {
 	struct sctp_cmd *cmd = seq->last_used_slot - 1;
 
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 3394c4d34ea4..adc1dde34bfe 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -6205,7 +6205,7 @@ static int sctp_eat_data(const struct sctp_association *asoc,
 	struct sctp_datahdr *data_hdr;
 	struct sctp_chunk *err;
 	size_t datalen;
-	sctp_verb_t deliver;
+	enum sctp_verb deliver;
 	int tmp;
 	__u32 tsn;
 	struct sctp_tsnmap *map = (struct sctp_tsnmap *)&asoc->peer.tsn_map;
-- 
cgit v1.2.3


From eb662a6a9b2a4ee3c76bbfc4c23f4dc56859b0f3 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Fri, 11 Aug 2017 10:23:55 +0800
Subject: sctp: remove the unused typedef sctp_sm_command_t

Remove this typedef including the struct, there is even no places
using it.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/sm.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'include/net')

diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h
index 9af64b98d96b..3ca75a6c4b5e 100644
--- a/include/net/sctp/sm.h
+++ b/include/net/sctp/sm.h
@@ -65,11 +65,6 @@ typedef enum {
 	SCTP_DISPOSITION_BUG,		 /* This is a bug.  */
 } sctp_disposition_t;
 
-typedef struct {
-	int name;
-	int action;
-} sctp_sm_command_t;
-
 typedef sctp_disposition_t (sctp_state_fn_t) (struct net *,
 					      const struct sctp_endpoint *,
 					      const struct sctp_association *,
-- 
cgit v1.2.3


From 8ee821aea39c6bf4142c9319adecea6d3e1af4a2 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Fri, 11 Aug 2017 10:23:56 +0800
Subject: sctp: remove the typedef sctp_sm_table_entry_t

This patch is to remove the typedef sctp_sm_table_entry_t, and
replace with struct sctp_sm_table_entry in the places where it's
using this typedef.

It is also to fix some indents.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/sm.h    |  6 +++---
 net/sctp/sm_sideeffect.c |  2 +-
 net/sctp/sm_statetable.c | 42 +++++++++++++++++++++++++-----------------
 3 files changed, 29 insertions(+), 21 deletions(-)

(limited to 'include/net')

diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h
index 3ca75a6c4b5e..7ad240228a0f 100644
--- a/include/net/sctp/sm.h
+++ b/include/net/sctp/sm.h
@@ -72,10 +72,10 @@ typedef sctp_disposition_t (sctp_state_fn_t) (struct net *,
 					      void *arg,
 					      struct sctp_cmd_seq *);
 typedef void (sctp_timer_event_t) (unsigned long);
-typedef struct {
+struct sctp_sm_table_entry {
 	sctp_state_fn_t *fn;
 	const char *name;
-} sctp_sm_table_entry_t;
+};
 
 /* A naming convention of "sctp_sf_xxx" applies to all the state functions
  * currently in use.
@@ -170,7 +170,7 @@ sctp_state_fn_t sctp_sf_autoclose_timer_expire;
 
 /* Prototypes for utility support functions.  */
 __u8 sctp_get_chunk_type(struct sctp_chunk *chunk);
-const sctp_sm_table_entry_t *sctp_sm_lookup_event(
+const struct sctp_sm_table_entry *sctp_sm_lookup_event(
 					struct net *net,
 					enum sctp_event event_type,
 					enum sctp_state state,
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index 6dd5934cbda6..2bc1204becbd 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -1146,7 +1146,7 @@ int sctp_do_sm(struct net *net, enum sctp_event event_type,
 	       void *event_arg, gfp_t gfp)
 {
 	struct sctp_cmd_seq commands;
-	const sctp_sm_table_entry_t *state_fn;
+	const struct sctp_sm_table_entry *state_fn;
 	sctp_disposition_t status;
 	int error = 0;
 	typedef const char *(printfn_t)(union sctp_subtype);
diff --git a/net/sctp/sm_statetable.c b/net/sctp/sm_statetable.c
index d437f3801399..79b6bee5b768 100644
--- a/net/sctp/sm_statetable.c
+++ b/net/sctp/sm_statetable.c
@@ -45,27 +45,27 @@
 #include <net/sctp/sctp.h>
 #include <net/sctp/sm.h>
 
-static const sctp_sm_table_entry_t
+static const struct sctp_sm_table_entry
 primitive_event_table[SCTP_NUM_PRIMITIVE_TYPES][SCTP_STATE_NUM_STATES];
-static const sctp_sm_table_entry_t
+static const struct sctp_sm_table_entry
 other_event_table[SCTP_NUM_OTHER_TYPES][SCTP_STATE_NUM_STATES];
-static const sctp_sm_table_entry_t
+static const struct sctp_sm_table_entry
 timeout_event_table[SCTP_NUM_TIMEOUT_TYPES][SCTP_STATE_NUM_STATES];
 
-static const sctp_sm_table_entry_t *sctp_chunk_event_lookup(
+static const struct sctp_sm_table_entry *sctp_chunk_event_lookup(
 						struct net *net,
 						enum sctp_cid cid,
 						enum sctp_state state);
 
 
-static const sctp_sm_table_entry_t bug = {
+static const struct sctp_sm_table_entry bug = {
 	.fn = sctp_sf_bug,
 	.name = "sctp_sf_bug"
 };
 
 #define DO_LOOKUP(_max, _type, _table)					\
 ({									\
-	const sctp_sm_table_entry_t *rtn;				\
+	const struct sctp_sm_table_entry *rtn;				\
 									\
 	if ((event_subtype._type > (_max))) {				\
 		pr_warn("table %p possible attack: event %d exceeds max %d\n", \
@@ -77,7 +77,7 @@ static const sctp_sm_table_entry_t bug = {
 	rtn;								\
 })
 
-const sctp_sm_table_entry_t *sctp_sm_lookup_event(
+const struct sctp_sm_table_entry *sctp_sm_lookup_event(
 					struct net *net,
 					enum sctp_event event_type,
 					enum sctp_state state,
@@ -394,7 +394,8 @@ const sctp_sm_table_entry_t *sctp_sm_lookup_event(
  *
  * For base protocol (RFC 2960).
  */
-static const sctp_sm_table_entry_t chunk_event_table[SCTP_NUM_BASE_CHUNK_TYPES][SCTP_STATE_NUM_STATES] = {
+static const struct sctp_sm_table_entry
+chunk_event_table[SCTP_NUM_BASE_CHUNK_TYPES][SCTP_STATE_NUM_STATES] = {
 	TYPE_SCTP_DATA,
 	TYPE_SCTP_INIT,
 	TYPE_SCTP_INIT_ACK,
@@ -453,7 +454,8 @@ static const sctp_sm_table_entry_t chunk_event_table[SCTP_NUM_BASE_CHUNK_TYPES][
 /* The primary index for this table is the chunk type.
  * The secondary index for this table is the state.
  */
-static const sctp_sm_table_entry_t addip_chunk_event_table[SCTP_NUM_ADDIP_CHUNK_TYPES][SCTP_STATE_NUM_STATES] = {
+static const struct sctp_sm_table_entry
+addip_chunk_event_table[SCTP_NUM_ADDIP_CHUNK_TYPES][SCTP_STATE_NUM_STATES] = {
 	TYPE_SCTP_ASCONF,
 	TYPE_SCTP_ASCONF_ACK,
 }; /*state_fn_t addip_chunk_event_table[][] */
@@ -480,7 +482,8 @@ static const sctp_sm_table_entry_t addip_chunk_event_table[SCTP_NUM_ADDIP_CHUNK_
 /* The primary index for this table is the chunk type.
  * The secondary index for this table is the state.
  */
-static const sctp_sm_table_entry_t prsctp_chunk_event_table[SCTP_NUM_PRSCTP_CHUNK_TYPES][SCTP_STATE_NUM_STATES] = {
+static const struct sctp_sm_table_entry
+prsctp_chunk_event_table[SCTP_NUM_PRSCTP_CHUNK_TYPES][SCTP_STATE_NUM_STATES] = {
 	TYPE_SCTP_FWD_TSN,
 }; /*state_fn_t prsctp_chunk_event_table[][] */
 
@@ -506,7 +509,8 @@ static const sctp_sm_table_entry_t prsctp_chunk_event_table[SCTP_NUM_PRSCTP_CHUN
 /* The primary index for this table is the chunk type.
  * The secondary index for this table is the state.
  */
-static const sctp_sm_table_entry_t reconf_chunk_event_table[SCTP_NUM_RECONF_CHUNK_TYPES][SCTP_STATE_NUM_STATES] = {
+static const struct sctp_sm_table_entry
+reconf_chunk_event_table[SCTP_NUM_RECONF_CHUNK_TYPES][SCTP_STATE_NUM_STATES] = {
 	TYPE_SCTP_RECONF,
 }; /*state_fn_t reconf_chunk_event_table[][] */
 
@@ -532,11 +536,12 @@ static const sctp_sm_table_entry_t reconf_chunk_event_table[SCTP_NUM_RECONF_CHUN
 /* The primary index for this table is the chunk type.
  * The secondary index for this table is the state.
  */
-static const sctp_sm_table_entry_t auth_chunk_event_table[SCTP_NUM_AUTH_CHUNK_TYPES][SCTP_STATE_NUM_STATES] = {
+static const struct sctp_sm_table_entry
+auth_chunk_event_table[SCTP_NUM_AUTH_CHUNK_TYPES][SCTP_STATE_NUM_STATES] = {
 	TYPE_SCTP_AUTH,
 }; /*state_fn_t auth_chunk_event_table[][] */
 
-static const sctp_sm_table_entry_t
+static const struct sctp_sm_table_entry
 chunk_event_table_unknown[SCTP_STATE_NUM_STATES] = {
 	/* SCTP_STATE_CLOSED */
 	TYPE_SCTP_FUNC(sctp_sf_ootb),
@@ -693,7 +698,8 @@ chunk_event_table_unknown[SCTP_STATE_NUM_STATES] = {
 /* The primary index for this table is the primitive type.
  * The secondary index for this table is the state.
  */
-static const sctp_sm_table_entry_t primitive_event_table[SCTP_NUM_PRIMITIVE_TYPES][SCTP_STATE_NUM_STATES] = {
+static const struct sctp_sm_table_entry
+primitive_event_table[SCTP_NUM_PRIMITIVE_TYPES][SCTP_STATE_NUM_STATES] = {
 	TYPE_SCTP_PRIMITIVE_ASSOCIATE,
 	TYPE_SCTP_PRIMITIVE_SHUTDOWN,
 	TYPE_SCTP_PRIMITIVE_ABORT,
@@ -741,7 +747,8 @@ static const sctp_sm_table_entry_t primitive_event_table[SCTP_NUM_PRIMITIVE_TYPE
 	TYPE_SCTP_FUNC(sctp_sf_ignore_other), \
 }
 
-static const sctp_sm_table_entry_t other_event_table[SCTP_NUM_OTHER_TYPES][SCTP_STATE_NUM_STATES] = {
+static const struct sctp_sm_table_entry
+other_event_table[SCTP_NUM_OTHER_TYPES][SCTP_STATE_NUM_STATES] = {
 	TYPE_SCTP_OTHER_NO_PENDING_TSN,
 	TYPE_SCTP_OTHER_ICMP_PROTO_UNREACH,
 };
@@ -955,7 +962,8 @@ static const sctp_sm_table_entry_t other_event_table[SCTP_NUM_OTHER_TYPES][SCTP_
 	TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \
 }
 
-static const sctp_sm_table_entry_t timeout_event_table[SCTP_NUM_TIMEOUT_TYPES][SCTP_STATE_NUM_STATES] = {
+static const struct sctp_sm_table_entry
+timeout_event_table[SCTP_NUM_TIMEOUT_TYPES][SCTP_STATE_NUM_STATES] = {
 	TYPE_SCTP_EVENT_TIMEOUT_NONE,
 	TYPE_SCTP_EVENT_TIMEOUT_T1_COOKIE,
 	TYPE_SCTP_EVENT_TIMEOUT_T1_INIT,
@@ -969,7 +977,7 @@ static const sctp_sm_table_entry_t timeout_event_table[SCTP_NUM_TIMEOUT_TYPES][S
 	TYPE_SCTP_EVENT_TIMEOUT_AUTOCLOSE,
 };
 
-static const sctp_sm_table_entry_t *sctp_chunk_event_lookup(
+static const struct sctp_sm_table_entry *sctp_chunk_event_lookup(
 						struct net *net,
 						enum sctp_cid cid,
 						enum sctp_state state)
-- 
cgit v1.2.3


From 172a1599ba88df7147f6503a75686fb89c8a1f3f Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Fri, 11 Aug 2017 10:23:57 +0800
Subject: sctp: remove the typedef sctp_disposition_t

This patch is to remove the typedef sctp_disposition_t, and
replace with enum sctp_disposition in the places where it's
using this typedef.

It's also to fix the indent for many functions' defination.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/sm.h    |   19 +-
 net/sctp/probe.c         |   13 +-
 net/sctp/sm_sideeffect.c |   50 +-
 net/sctp/sm_statefuns.c  | 1316 ++++++++++++++++++++++++----------------------
 4 files changed, 717 insertions(+), 681 deletions(-)

(limited to 'include/net')

diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h
index 7ad240228a0f..33077f317995 100644
--- a/include/net/sctp/sm.h
+++ b/include/net/sctp/sm.h
@@ -53,7 +53,7 @@
 /*
  * Possible values for the disposition are:
  */
-typedef enum {
+enum sctp_disposition {
 	SCTP_DISPOSITION_DISCARD,	 /* No further processing.  */
 	SCTP_DISPOSITION_CONSUME,	 /* Process return values normally.  */
 	SCTP_DISPOSITION_NOMEM,		 /* We ran out of memory--recover.  */
@@ -63,14 +63,15 @@ typedef enum {
 	SCTP_DISPOSITION_NOT_IMPL,	 /* This entry is not implemented.  */
 	SCTP_DISPOSITION_ERROR,		 /* This is plain old user error.  */
 	SCTP_DISPOSITION_BUG,		 /* This is a bug.  */
-} sctp_disposition_t;
-
-typedef sctp_disposition_t (sctp_state_fn_t) (struct net *,
-					      const struct sctp_endpoint *,
-					      const struct sctp_association *,
-					      const union sctp_subtype type,
-					      void *arg,
-					      struct sctp_cmd_seq *);
+};
+
+typedef enum sctp_disposition (sctp_state_fn_t) (
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands);
 typedef void (sctp_timer_event_t) (unsigned long);
 struct sctp_sm_table_entry {
 	sctp_state_fn_t *fn;
diff --git a/net/sctp/probe.c b/net/sctp/probe.c
index 34097a167431..1280f85a598d 100644
--- a/net/sctp/probe.c
+++ b/net/sctp/probe.c
@@ -127,12 +127,13 @@ static const struct file_operations sctpprobe_fops = {
 	.llseek = noop_llseek,
 };
 
-static sctp_disposition_t jsctp_sf_eat_sack(struct net *net,
-					    const struct sctp_endpoint *ep,
-					    const struct sctp_association *asoc,
-					    const union sctp_subtype type,
-					    void *arg,
-					    struct sctp_cmd_seq *commands)
+static enum sctp_disposition jsctp_sf_eat_sack(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *chunk = arg;
 	struct sk_buff *skb = chunk->skb;
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index 2bc1204becbd..e6a2974e020e 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -57,7 +57,7 @@ static int sctp_cmd_interpreter(enum sctp_event event_type,
 				struct sctp_endpoint *ep,
 				struct sctp_association *asoc,
 				void *event_arg,
-				sctp_disposition_t status,
+				enum sctp_disposition status,
 				struct sctp_cmd_seq *commands,
 				gfp_t gfp);
 static int sctp_side_effects(enum sctp_event event_type,
@@ -66,7 +66,7 @@ static int sctp_side_effects(enum sctp_event event_type,
 			     struct sctp_endpoint *ep,
 			     struct sctp_association **asoc,
 			     void *event_arg,
-			     sctp_disposition_t status,
+			     enum sctp_disposition status,
 			     struct sctp_cmd_seq *commands,
 			     gfp_t gfp);
 
@@ -97,8 +97,8 @@ static void sctp_do_ecn_ce_work(struct sctp_association *asoc,
  * that was originally marked with the CE bit.
  */
 static struct sctp_chunk *sctp_do_ecn_ecne_work(struct sctp_association *asoc,
-					   __u32 lowest_tsn,
-					   struct sctp_chunk *chunk)
+						__u32 lowest_tsn,
+						struct sctp_chunk *chunk)
 {
 	struct sctp_chunk *repl;
 
@@ -152,9 +152,9 @@ static void sctp_do_ecn_cwr_work(struct sctp_association *asoc,
 static int sctp_gen_sack(struct sctp_association *asoc, int force,
 			 struct sctp_cmd_seq *commands)
 {
+	struct sctp_transport *trans = asoc->peer.last_data_from;
 	__u32 ctsn, max_tsn_seen;
 	struct sctp_chunk *sack;
-	struct sctp_transport *trans = asoc->peer.last_data_from;
 	int error = 0;
 
 	if (force ||
@@ -244,11 +244,11 @@ nomem:
  */
 void sctp_generate_t3_rtx_event(unsigned long peer)
 {
-	int error;
 	struct sctp_transport *transport = (struct sctp_transport *) peer;
 	struct sctp_association *asoc = transport->asoc;
 	struct sock *sk = asoc->base.sk;
 	struct net *net = sock_net(sk);
+	int error;
 
 	/* Check whether a task is in the sock.  */
 
@@ -361,12 +361,12 @@ static void sctp_generate_autoclose_event(unsigned long data)
  */
 void sctp_generate_heartbeat_event(unsigned long data)
 {
-	int error = 0;
 	struct sctp_transport *transport = (struct sctp_transport *) data;
 	struct sctp_association *asoc = transport->asoc;
 	struct sock *sk = asoc->base.sk;
 	struct net *net = sock_net(sk);
 	u32 elapsed, timeout;
+	int error = 0;
 
 	bh_lock_sock(sk);
 	if (sock_owned_by_user(sk)) {
@@ -406,7 +406,7 @@ out_unlock:
  */
 void sctp_generate_proto_unreach_event(unsigned long data)
 {
-	struct sctp_transport *transport = (struct sctp_transport *) data;
+	struct sctp_transport *transport = (struct sctp_transport *)data;
 	struct sctp_association *asoc = transport->asoc;
 	struct sock *sk = asoc->base.sk;
 	struct net *net = sock_net(sk);
@@ -472,7 +472,7 @@ out_unlock:
 /* Inject a SACK Timeout event into the state machine.  */
 static void sctp_generate_sack_event(unsigned long data)
 {
-	struct sctp_association *asoc = (struct sctp_association *) data;
+	struct sctp_association *asoc = (struct sctp_association *)data;
 	sctp_generate_timeout_event(asoc, SCTP_EVENT_TIMEOUT_SACK);
 }
 
@@ -610,6 +610,7 @@ static void sctp_cmd_assoc_failed(struct sctp_cmd_seq *commands,
 {
 	struct sctp_ulpevent *event;
 	struct sctp_chunk *abort;
+
 	/* Cancel any partial delivery in progress. */
 	sctp_ulpq_abort_pd(&asoc->ulpq, GFP_ATOMIC);
 
@@ -991,6 +992,7 @@ static void sctp_cmd_process_fwdtsn(struct sctp_ulpq *ulpq,
 				    struct sctp_chunk *chunk)
 {
 	struct sctp_fwdtsn_skip *skip;
+
 	/* Walk through all the skipped SSNs */
 	sctp_walk_fwdtsn(skip, chunk) {
 		sctp_ulpq_skip(ulpq, ntohs(skip->stream), ntohs(skip->ssn));
@@ -1003,8 +1005,8 @@ static void sctp_cmd_process_fwdtsn(struct sctp_ulpq *ulpq,
 static void sctp_cmd_del_non_primary(struct sctp_association *asoc)
 {
 	struct sctp_transport *t;
-	struct list_head *pos;
 	struct list_head *temp;
+	struct list_head *pos;
 
 	list_for_each_safe(pos, temp, &asoc->peer.transport_addr_list) {
 		t = list_entry(pos, struct sctp_transport, transports);
@@ -1145,15 +1147,15 @@ int sctp_do_sm(struct net *net, enum sctp_event event_type,
 	       struct sctp_endpoint *ep, struct sctp_association *asoc,
 	       void *event_arg, gfp_t gfp)
 {
-	struct sctp_cmd_seq commands;
-	const struct sctp_sm_table_entry *state_fn;
-	sctp_disposition_t status;
-	int error = 0;
 	typedef const char *(printfn_t)(union sctp_subtype);
 	static printfn_t *table[] = {
 		NULL, sctp_cname, sctp_tname, sctp_oname, sctp_pname,
 	};
 	printfn_t *debug_fn  __attribute__ ((unused)) = table[event_type];
+	const struct sctp_sm_table_entry *state_fn;
+	struct sctp_cmd_seq commands;
+	enum sctp_disposition status;
+	int error = 0;
 
 	/* Look up the state function, run it, and then process the
 	 * side effects.  These three steps are the heart of lksctp.
@@ -1183,7 +1185,7 @@ static int sctp_side_effects(enum sctp_event event_type,
 			     struct sctp_endpoint *ep,
 			     struct sctp_association **asoc,
 			     void *event_arg,
-			     sctp_disposition_t status,
+			     enum sctp_disposition status,
 			     struct sctp_cmd_seq *commands,
 			     gfp_t gfp)
 {
@@ -1269,23 +1271,21 @@ static int sctp_cmd_interpreter(enum sctp_event event_type,
 				struct sctp_endpoint *ep,
 				struct sctp_association *asoc,
 				void *event_arg,
-				sctp_disposition_t status,
+				enum sctp_disposition status,
 				struct sctp_cmd_seq *commands,
 				gfp_t gfp)
 {
-	struct sock *sk = ep->base.sk;
-	struct sctp_sock *sp = sctp_sk(sk);
-	int error = 0;
-	int force;
-	struct sctp_cmd *cmd;
-	struct sctp_chunk *new_obj;
-	struct sctp_chunk *chunk = NULL;
+	struct sctp_sock *sp = sctp_sk(ep->base.sk);
+	struct sctp_chunk *chunk = NULL, *new_obj;
 	struct sctp_packet *packet;
+	struct sctp_sackhdr sackh;
 	struct timer_list *timer;
-	unsigned long timeout;
 	struct sctp_transport *t;
-	struct sctp_sackhdr sackh;
+	unsigned long timeout;
+	struct sctp_cmd *cmd;
 	int local_cork = 0;
+	int error = 0;
+	int force;
 
 	if (SCTP_EVENT_T_TIMEOUT != event_type)
 		chunk = event_arg;
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index adc1dde34bfe..8f8ccded13e4 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -59,37 +59,41 @@
 #include <net/sctp/sm.h>
 #include <net/sctp/structs.h>
 
-static struct sctp_packet *sctp_abort_pkt_new(struct net *net,
-				  const struct sctp_endpoint *ep,
-				  const struct sctp_association *asoc,
-				  struct sctp_chunk *chunk,
-				  const void *payload,
-				  size_t paylen);
+static struct sctp_packet *sctp_abort_pkt_new(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					struct sctp_chunk *chunk,
+					const void *payload, size_t paylen);
 static int sctp_eat_data(const struct sctp_association *asoc,
 			 struct sctp_chunk *chunk,
 			 struct sctp_cmd_seq *commands);
-static struct sctp_packet *sctp_ootb_pkt_new(struct net *net,
-					     const struct sctp_association *asoc,
-					     const struct sctp_chunk *chunk);
+static struct sctp_packet *sctp_ootb_pkt_new(
+					struct net *net,
+					const struct sctp_association *asoc,
+					const struct sctp_chunk *chunk);
 static void sctp_send_stale_cookie_err(struct net *net,
 				       const struct sctp_endpoint *ep,
 				       const struct sctp_association *asoc,
 				       const struct sctp_chunk *chunk,
 				       struct sctp_cmd_seq *commands,
 				       struct sctp_chunk *err_chunk);
-static sctp_disposition_t sctp_sf_do_5_2_6_stale(struct net *net,
-						 const struct sctp_endpoint *ep,
-						 const struct sctp_association *asoc,
-						 const union sctp_subtype type,
-						 void *arg,
-						 struct sctp_cmd_seq *commands);
-static sctp_disposition_t sctp_sf_shut_8_4_5(struct net *net,
-					     const struct sctp_endpoint *ep,
-					     const struct sctp_association *asoc,
-					     const union sctp_subtype type,
-					     void *arg,
-					     struct sctp_cmd_seq *commands);
-static sctp_disposition_t sctp_sf_tabort_8_4_8(struct net *net,
+static enum sctp_disposition sctp_sf_do_5_2_6_stale(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands);
+static enum sctp_disposition sctp_sf_shut_8_4_5(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands);
+static enum sctp_disposition sctp_sf_tabort_8_4_8(
+					struct net *net,
 					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
 					const union sctp_subtype type,
@@ -97,61 +101,63 @@ static sctp_disposition_t sctp_sf_tabort_8_4_8(struct net *net,
 					struct sctp_cmd_seq *commands);
 static struct sctp_sackhdr *sctp_sm_pull_sack(struct sctp_chunk *chunk);
 
-static sctp_disposition_t sctp_stop_t1_and_abort(struct net *net,
-					   struct sctp_cmd_seq *commands,
-					   __be16 error, int sk_err,
-					   const struct sctp_association *asoc,
-					   struct sctp_transport *transport);
+static enum sctp_disposition sctp_stop_t1_and_abort(
+					struct net *net,
+					struct sctp_cmd_seq *commands,
+					__be16 error, int sk_err,
+					const struct sctp_association *asoc,
+					struct sctp_transport *transport);
 
-static sctp_disposition_t sctp_sf_abort_violation(
-				     struct net *net,
-				     const struct sctp_endpoint *ep,
-				     const struct sctp_association *asoc,
-				     void *arg,
-				     struct sctp_cmd_seq *commands,
-				     const __u8 *payload,
-				     const size_t paylen);
+static enum sctp_disposition sctp_sf_abort_violation(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					void *arg,
+					struct sctp_cmd_seq *commands,
+					const __u8 *payload,
+					const size_t paylen);
 
-static sctp_disposition_t sctp_sf_violation_chunklen(
-				     struct net *net,
-				     const struct sctp_endpoint *ep,
-				     const struct sctp_association *asoc,
-				     const union sctp_subtype type,
-				     void *arg,
-				     struct sctp_cmd_seq *commands);
+static enum sctp_disposition sctp_sf_violation_chunklen(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands);
 
-static sctp_disposition_t sctp_sf_violation_paramlen(
-				     struct net *net,
-				     const struct sctp_endpoint *ep,
-				     const struct sctp_association *asoc,
-				     const union sctp_subtype type,
-				     void *arg, void *ext,
-				     struct sctp_cmd_seq *commands);
+static enum sctp_disposition sctp_sf_violation_paramlen(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg, void *ext,
+					struct sctp_cmd_seq *commands);
 
-static sctp_disposition_t sctp_sf_violation_ctsn(
-				     struct net *net,
-				     const struct sctp_endpoint *ep,
-				     const struct sctp_association *asoc,
-				     const union sctp_subtype type,
-				     void *arg,
-				     struct sctp_cmd_seq *commands);
+static enum sctp_disposition sctp_sf_violation_ctsn(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands);
 
-static sctp_disposition_t sctp_sf_violation_chunk(
-				     struct net *net,
-				     const struct sctp_endpoint *ep,
-				     const struct sctp_association *asoc,
-				     const union sctp_subtype type,
-				     void *arg,
-				     struct sctp_cmd_seq *commands);
+static enum sctp_disposition sctp_sf_violation_chunk(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands);
 
 static enum sctp_ierror sctp_sf_authenticate(
-				struct net *net,
-				const struct sctp_endpoint *ep,
-				const struct sctp_association *asoc,
-				const union sctp_subtype type,
-				struct sctp_chunk *chunk);
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					struct sctp_chunk *chunk);
 
-static sctp_disposition_t __sctp_sf_do_9_1_abort(struct net *net,
+static enum sctp_disposition __sctp_sf_do_9_1_abort(
+					struct net *net,
 					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
 					const union sctp_subtype type,
@@ -165,8 +171,8 @@ static sctp_disposition_t __sctp_sf_do_9_1_abort(struct net *net,
  * 		   false = Invalid length
  *
  */
-static inline bool
-sctp_chunk_length_valid(struct sctp_chunk *chunk, __u16 required_length)
+static inline bool sctp_chunk_length_valid(struct sctp_chunk *chunk,
+					   __u16 required_length)
 {
 	__u16 chunk_length = ntohs(chunk->chunk_hdr->length);
 
@@ -214,12 +220,11 @@ sctp_chunk_length_valid(struct sctp_chunk *chunk, __u16 required_length)
  *
  * The return value is the disposition of the chunk.
  */
-sctp_disposition_t sctp_sf_do_4_C(struct net *net,
-				  const struct sctp_endpoint *ep,
-				  const struct sctp_association *asoc,
-				  const union sctp_subtype type,
-				  void *arg,
-				  struct sctp_cmd_seq *commands)
+enum sctp_disposition sctp_sf_do_4_C(struct net *net,
+				     const struct sctp_endpoint *ep,
+				     const struct sctp_association *asoc,
+				     const union sctp_subtype type,
+				     void *arg, struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *chunk = arg;
 	struct sctp_ulpevent *ev;
@@ -300,12 +305,12 @@ sctp_disposition_t sctp_sf_do_4_C(struct net *net,
  *
  * The return value is the disposition of the chunk.
  */
-sctp_disposition_t sctp_sf_do_5_1B_init(struct net *net,
-					const struct sctp_endpoint *ep,
-					const struct sctp_association *asoc,
-					const union sctp_subtype type,
-					void *arg,
-					struct sctp_cmd_seq *commands)
+enum sctp_disposition sctp_sf_do_5_1B_init(struct net *net,
+					   const struct sctp_endpoint *ep,
+					   const struct sctp_association *asoc,
+					   const union sctp_subtype type,
+					   void *arg,
+					   struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *chunk = arg, *repl, *err_chunk;
 	struct sctp_unrecognized_param *unk_param;
@@ -494,15 +499,15 @@ nomem:
  *
  * The return value is the disposition of the chunk.
  */
-sctp_disposition_t sctp_sf_do_5_1C_ack(struct net *net,
-				       const struct sctp_endpoint *ep,
-				       const struct sctp_association *asoc,
-				       const union sctp_subtype type,
-				       void *arg,
-				       struct sctp_cmd_seq *commands)
+enum sctp_disposition sctp_sf_do_5_1C_ack(struct net *net,
+					  const struct sctp_endpoint *ep,
+					  const struct sctp_association *asoc,
+					  const union sctp_subtype type,
+					  void *arg,
+					  struct sctp_cmd_seq *commands)
 {
-	struct sctp_chunk *chunk = arg;
 	struct sctp_init_chunk *initchunk;
+	struct sctp_chunk *chunk = arg;
 	struct sctp_chunk *err_chunk;
 	struct sctp_packet *packet;
 
@@ -644,20 +649,21 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(struct net *net,
  *
  * The return value is the disposition of the chunk.
  */
-sctp_disposition_t sctp_sf_do_5_1D_ce(struct net *net,
-				      const struct sctp_endpoint *ep,
-				      const struct sctp_association *asoc,
-				      const union sctp_subtype type, void *arg,
-				      struct sctp_cmd_seq *commands)
+enum sctp_disposition sctp_sf_do_5_1D_ce(struct net *net,
+					 const struct sctp_endpoint *ep,
+					 const struct sctp_association *asoc,
+					 const union sctp_subtype type,
+					 void *arg,
+					 struct sctp_cmd_seq *commands)
 {
-	struct sctp_chunk *chunk = arg;
+	struct sctp_ulpevent *ev, *ai_ev = NULL;
 	struct sctp_association *new_asoc;
 	struct sctp_init_chunk *peer_init;
-	struct sctp_chunk *repl;
-	struct sctp_ulpevent *ev, *ai_ev = NULL;
-	int error = 0;
+	struct sctp_chunk *chunk = arg;
 	struct sctp_chunk *err_chk_p;
+	struct sctp_chunk *repl;
 	struct sock *sk;
+	int error = 0;
 
 	/* If the packet is an OOTB packet which is temporarily on the
 	 * control endpoint, respond with an ABORT.
@@ -871,11 +877,12 @@ nomem:
  *
  * The return value is the disposition of the chunk.
  */
-sctp_disposition_t sctp_sf_do_5_1E_ca(struct net *net,
-				      const struct sctp_endpoint *ep,
-				      const struct sctp_association *asoc,
-				      const union sctp_subtype type, void *arg,
-				      struct sctp_cmd_seq *commands)
+enum sctp_disposition sctp_sf_do_5_1E_ca(struct net *net,
+					 const struct sctp_endpoint *ep,
+					 const struct sctp_association *asoc,
+					 const union sctp_subtype type,
+					 void *arg,
+					 struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *chunk = arg;
 	struct sctp_ulpevent *ev;
@@ -949,11 +956,12 @@ nomem:
 }
 
 /* Generate and sendout a heartbeat packet.  */
-static sctp_disposition_t sctp_sf_heartbeat(const struct sctp_endpoint *ep,
-					    const struct sctp_association *asoc,
-					    const union sctp_subtype type,
-					    void *arg,
-					    struct sctp_cmd_seq *commands)
+static enum sctp_disposition sctp_sf_heartbeat(
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
 	struct sctp_transport *transport = (struct sctp_transport *) arg;
 	struct sctp_chunk *reply;
@@ -974,12 +982,12 @@ static sctp_disposition_t sctp_sf_heartbeat(const struct sctp_endpoint *ep,
 }
 
 /* Generate a HEARTBEAT packet on the given transport.  */
-sctp_disposition_t sctp_sf_sendbeat_8_3(struct net *net,
-					const struct sctp_endpoint *ep,
-					const struct sctp_association *asoc,
-					const union sctp_subtype type,
-					void *arg,
-					struct sctp_cmd_seq *commands)
+enum sctp_disposition sctp_sf_sendbeat_8_3(struct net *net,
+					   const struct sctp_endpoint *ep,
+					   const struct sctp_association *asoc,
+					   const union sctp_subtype type,
+					   void *arg,
+					   struct sctp_cmd_seq *commands)
 {
 	struct sctp_transport *transport = (struct sctp_transport *) arg;
 
@@ -1022,11 +1030,12 @@ sctp_disposition_t sctp_sf_sendbeat_8_3(struct net *net,
 }
 
 /* resend asoc strreset_chunk.  */
-sctp_disposition_t sctp_sf_send_reconf(struct net *net,
-				       const struct sctp_endpoint *ep,
-				       const struct sctp_association *asoc,
-				       const union sctp_subtype type, void *arg,
-				       struct sctp_cmd_seq *commands)
+enum sctp_disposition sctp_sf_send_reconf(struct net *net,
+					  const struct sctp_endpoint *ep,
+					  const struct sctp_association *asoc,
+					  const union sctp_subtype type,
+					  void *arg,
+					  struct sctp_cmd_seq *commands)
 {
 	struct sctp_transport *transport = arg;
 
@@ -1073,12 +1082,11 @@ sctp_disposition_t sctp_sf_send_reconf(struct net *net,
  *
  * The return value is the disposition of the chunk.
  */
-sctp_disposition_t sctp_sf_beat_8_3(struct net *net,
-				    const struct sctp_endpoint *ep,
-				    const struct sctp_association *asoc,
-				    const union sctp_subtype type,
-				    void *arg,
-				    struct sctp_cmd_seq *commands)
+enum sctp_disposition sctp_sf_beat_8_3(struct net *net,
+				       const struct sctp_endpoint *ep,
+				       const struct sctp_association *asoc,
+				       const union sctp_subtype type,
+				       void *arg, struct sctp_cmd_seq *commands)
 {
 	struct sctp_paramhdr *param_hdr;
 	struct sctp_chunk *chunk = arg;
@@ -1148,12 +1156,12 @@ nomem:
  *
  * The return value is the disposition of the chunk.
  */
-sctp_disposition_t sctp_sf_backbeat_8_3(struct net *net,
-					const struct sctp_endpoint *ep,
-					const struct sctp_association *asoc,
-					const union sctp_subtype type,
-					void *arg,
-					struct sctp_cmd_seq *commands)
+enum sctp_disposition sctp_sf_backbeat_8_3(struct net *net,
+					   const struct sctp_endpoint *ep,
+					   const struct sctp_association *asoc,
+					   const union sctp_subtype type,
+					   void *arg,
+					   struct sctp_cmd_seq *commands)
 {
 	struct sctp_sender_hb_info *hbinfo;
 	struct sctp_chunk *chunk = arg;
@@ -1227,13 +1235,13 @@ static int sctp_sf_send_restart_abort(struct net *net, union sctp_addr *ssa,
 				      struct sctp_chunk *init,
 				      struct sctp_cmd_seq *commands)
 {
-	int len;
-	struct sctp_packet *pkt;
+	struct sctp_af *af = sctp_get_af_specific(ssa->v4.sin_family);
 	union sctp_addr_param *addrparm;
 	struct sctp_errhdr *errhdr;
-	struct sctp_endpoint *ep;
 	char buffer[sizeof(*errhdr) + sizeof(*addrparm)];
-	struct sctp_af *af = sctp_get_af_specific(ssa->v4.sin_family);
+	struct sctp_endpoint *ep;
+	struct sctp_packet *pkt;
+	int len;
 
 	/* Build the error on the stack.   We are way to malloc crazy
 	 * throughout the code today.
@@ -1410,18 +1418,19 @@ static char sctp_tietags_compare(struct sctp_association *new_asoc,
 /* Common helper routine for both duplicate and simulataneous INIT
  * chunk handling.
  */
-static sctp_disposition_t sctp_sf_do_unexpected_init(
-	struct net *net,
-	const struct sctp_endpoint *ep,
-	const struct sctp_association *asoc,
-	const union sctp_subtype type,
-	void *arg, struct sctp_cmd_seq *commands)
+static enum sctp_disposition sctp_sf_do_unexpected_init(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *chunk = arg, *repl, *err_chunk;
 	struct sctp_unrecognized_param *unk_param;
 	struct sctp_association *new_asoc;
+	enum sctp_disposition retval;
 	struct sctp_packet *packet;
-	sctp_disposition_t retval;
 	int len;
 
 	/* 6.10 Bundling
@@ -1622,12 +1631,13 @@ cleanup:
  *
  * The return value is the disposition of the chunk.
  */
-sctp_disposition_t sctp_sf_do_5_2_1_siminit(struct net *net,
-				    const struct sctp_endpoint *ep,
-				    const struct sctp_association *asoc,
-				    const union sctp_subtype type,
-				    void *arg,
-				    struct sctp_cmd_seq *commands)
+enum sctp_disposition sctp_sf_do_5_2_1_siminit(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
 	/* Call helper to do the real work for both simulataneous and
 	 * duplicate INIT chunk handling.
@@ -1676,7 +1686,8 @@ sctp_disposition_t sctp_sf_do_5_2_1_siminit(struct net *net,
  *
  * The return value is the disposition of the chunk.
  */
-sctp_disposition_t sctp_sf_do_5_2_2_dupinit(struct net *net,
+enum sctp_disposition sctp_sf_do_5_2_2_dupinit(
+					struct net *net,
 					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
 					const union sctp_subtype type,
@@ -1699,12 +1710,13 @@ sctp_disposition_t sctp_sf_do_5_2_2_dupinit(struct net *net,
  * An unexpected INIT ACK usually indicates the processing of an old or
  * duplicated INIT chunk.
 */
-sctp_disposition_t sctp_sf_do_5_2_3_initack(struct net *net,
-					    const struct sctp_endpoint *ep,
-					    const struct sctp_association *asoc,
-					    const union sctp_subtype type,
-					    void *arg,
-					    struct sctp_cmd_seq *commands)
+enum sctp_disposition sctp_sf_do_5_2_3_initack(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
 	/* Per the above section, we'll discard the chunk if we have an
 	 * endpoint.  If this is an OOTB INIT-ACK, treat it as such.
@@ -1720,7 +1732,8 @@ sctp_disposition_t sctp_sf_do_5_2_3_initack(struct net *net,
  * Section 5.2.4
  *  A)  In this case, the peer may have restarted.
  */
-static sctp_disposition_t sctp_sf_do_dupcook_a(struct net *net,
+static enum sctp_disposition sctp_sf_do_dupcook_a(
+					struct net *net,
 					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
 					struct sctp_chunk *chunk,
@@ -1728,10 +1741,10 @@ static sctp_disposition_t sctp_sf_do_dupcook_a(struct net *net,
 					struct sctp_association *new_asoc)
 {
 	struct sctp_init_chunk *peer_init;
+	enum sctp_disposition disposition;
 	struct sctp_ulpevent *ev;
 	struct sctp_chunk *repl;
 	struct sctp_chunk *err;
-	sctp_disposition_t disposition;
 
 	/* new_asoc is a brand-new association, so these are not yet
 	 * side effects--it is safe to run them here.
@@ -1835,7 +1848,8 @@ nomem:
  *      after responding to the local endpoint's INIT
  */
 /* This case represents an initialization collision.  */
-static sctp_disposition_t sctp_sf_do_dupcook_b(struct net *net,
+static enum sctp_disposition sctp_sf_do_dupcook_b(
+					struct net *net,
 					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
 					struct sctp_chunk *chunk,
@@ -1906,7 +1920,8 @@ nomem:
  *     but a new tag of its own.
  */
 /* This case represents an initialization collision.  */
-static sctp_disposition_t sctp_sf_do_dupcook_c(struct net *net,
+static enum sctp_disposition sctp_sf_do_dupcook_c(
+					struct net *net,
 					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
 					struct sctp_chunk *chunk,
@@ -1928,7 +1943,8 @@ static sctp_disposition_t sctp_sf_do_dupcook_c(struct net *net,
  *    enter the ESTABLISHED state, if it has not already done so.
  */
 /* This case represents an initialization collision.  */
-static sctp_disposition_t sctp_sf_do_dupcook_d(struct net *net,
+static enum sctp_disposition sctp_sf_do_dupcook_d(
+					struct net *net,
 					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
 					struct sctp_chunk *chunk,
@@ -2023,19 +2039,20 @@ nomem:
  *
  * The return value is the disposition of the chunk.
  */
-sctp_disposition_t sctp_sf_do_5_2_4_dupcook(struct net *net,
+enum sctp_disposition sctp_sf_do_5_2_4_dupcook(
+					struct net *net,
 					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
 					const union sctp_subtype type,
 					void *arg,
 					struct sctp_cmd_seq *commands)
 {
-	sctp_disposition_t retval;
-	struct sctp_chunk *chunk = arg;
 	struct sctp_association *new_asoc;
+	struct sctp_chunk *chunk = arg;
+	enum sctp_disposition retval;
+	struct sctp_chunk *err_chk_p;
 	int error = 0;
 	char action;
-	struct sctp_chunk *err_chk_p;
 
 	/* Make sure that the chunk has a valid length from the protocol
 	 * perspective.  In this case check to make sure we have at least
@@ -2141,13 +2158,13 @@ nomem:
  *
  * See sctp_sf_do_9_1_abort().
  */
-sctp_disposition_t sctp_sf_shutdown_pending_abort(
-	struct net *net,
-	const struct sctp_endpoint *ep,
-	const struct sctp_association *asoc,
-	const union sctp_subtype type,
-	void *arg,
-	struct sctp_cmd_seq *commands)
+enum sctp_disposition sctp_sf_shutdown_pending_abort(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *chunk = arg;
 
@@ -2184,7 +2201,8 @@ sctp_disposition_t sctp_sf_shutdown_pending_abort(
  *
  * See sctp_sf_do_9_1_abort().
  */
-sctp_disposition_t sctp_sf_shutdown_sent_abort(struct net *net,
+enum sctp_disposition sctp_sf_shutdown_sent_abort(
+					struct net *net,
 					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
 					const union sctp_subtype type,
@@ -2234,13 +2252,13 @@ sctp_disposition_t sctp_sf_shutdown_sent_abort(struct net *net,
  *
  * See sctp_sf_do_9_1_abort().
  */
-sctp_disposition_t sctp_sf_shutdown_ack_sent_abort(
-	struct net *net,
-	const struct sctp_endpoint *ep,
-	const struct sctp_association *asoc,
-	const union sctp_subtype type,
-	void *arg,
-	struct sctp_cmd_seq *commands)
+enum sctp_disposition sctp_sf_shutdown_ack_sent_abort(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
 	/* The same T2 timer, so we should be able to use
 	 * common function with the SHUTDOWN-SENT state.
@@ -2262,7 +2280,8 @@ sctp_disposition_t sctp_sf_shutdown_ack_sent_abort(
  *
  * The return value is the disposition of the chunk.
  */
-sctp_disposition_t sctp_sf_cookie_echoed_err(struct net *net,
+enum sctp_disposition sctp_sf_cookie_echoed_err(
+					struct net *net,
 					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
 					const union sctp_subtype type,
@@ -2326,12 +2345,13 @@ sctp_disposition_t sctp_sf_cookie_echoed_err(struct net *net,
  *
  * The return value is the disposition of the chunk.
  */
-static sctp_disposition_t sctp_sf_do_5_2_6_stale(struct net *net,
-						 const struct sctp_endpoint *ep,
-						 const struct sctp_association *asoc,
-						 const union sctp_subtype type,
-						 void *arg,
-						 struct sctp_cmd_seq *commands)
+static enum sctp_disposition sctp_sf_do_5_2_6_stale(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
 	int attempts = asoc->init_err_counter + 1;
 	struct sctp_chunk *chunk = arg, *reply;
@@ -2448,7 +2468,8 @@ nomem:
  *
  * The return value is the disposition of the chunk.
  */
-sctp_disposition_t sctp_sf_do_9_1_abort(struct net *net,
+enum sctp_disposition sctp_sf_do_9_1_abort(
+					struct net *net,
 					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
 					const union sctp_subtype type,
@@ -2485,16 +2506,17 @@ sctp_disposition_t sctp_sf_do_9_1_abort(struct net *net,
 	return __sctp_sf_do_9_1_abort(net, ep, asoc, type, arg, commands);
 }
 
-static sctp_disposition_t __sctp_sf_do_9_1_abort(struct net *net,
+static enum sctp_disposition __sctp_sf_do_9_1_abort(
+					struct net *net,
 					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
 					const union sctp_subtype type,
 					void *arg,
 					struct sctp_cmd_seq *commands)
 {
+	__be16 error = SCTP_ERROR_NO_ERROR;
 	struct sctp_chunk *chunk = arg;
 	unsigned int len;
-	__be16 error = SCTP_ERROR_NO_ERROR;
 
 	/* See if we have an error cause code in the chunk.  */
 	len = ntohs(chunk->chunk_hdr->length);
@@ -2523,16 +2545,17 @@ static sctp_disposition_t __sctp_sf_do_9_1_abort(struct net *net,
  *
  * See sctp_sf_do_9_1_abort() above.
  */
-sctp_disposition_t sctp_sf_cookie_wait_abort(struct net *net,
-				     const struct sctp_endpoint *ep,
-				     const struct sctp_association *asoc,
-				     const union sctp_subtype type,
-				     void *arg,
-				     struct sctp_cmd_seq *commands)
+enum sctp_disposition sctp_sf_cookie_wait_abort(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
+	__be16 error = SCTP_ERROR_NO_ERROR;
 	struct sctp_chunk *chunk = arg;
 	unsigned int len;
-	__be16 error = SCTP_ERROR_NO_ERROR;
 
 	if (!sctp_vtag_verify_either(chunk, asoc))
 		return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
@@ -2562,7 +2585,8 @@ sctp_disposition_t sctp_sf_cookie_wait_abort(struct net *net,
 /*
  * Process an incoming ICMP as an ABORT.  (COOKIE-WAIT state)
  */
-sctp_disposition_t sctp_sf_cookie_wait_icmp_abort(struct net *net,
+enum sctp_disposition sctp_sf_cookie_wait_icmp_abort(
+					struct net *net,
 					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
 					const union sctp_subtype type,
@@ -2577,12 +2601,13 @@ sctp_disposition_t sctp_sf_cookie_wait_icmp_abort(struct net *net,
 /*
  * Process an ABORT.  (COOKIE-ECHOED state)
  */
-sctp_disposition_t sctp_sf_cookie_echoed_abort(struct net *net,
-					       const struct sctp_endpoint *ep,
-					       const struct sctp_association *asoc,
-					       const union sctp_subtype type,
-					       void *arg,
-					       struct sctp_cmd_seq *commands)
+enum sctp_disposition sctp_sf_cookie_echoed_abort(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
 	/* There is a single T1 timer, so we should be able to use
 	 * common function with the COOKIE-WAIT state.
@@ -2595,11 +2620,12 @@ sctp_disposition_t sctp_sf_cookie_echoed_abort(struct net *net,
  *
  * This is common code called by several sctp_sf_*_abort() functions above.
  */
-static sctp_disposition_t sctp_stop_t1_and_abort(struct net *net,
-					   struct sctp_cmd_seq *commands,
-					   __be16 error, int sk_err,
-					   const struct sctp_association *asoc,
-					   struct sctp_transport *transport)
+static enum sctp_disposition sctp_stop_t1_and_abort(
+					struct net *net,
+					struct sctp_cmd_seq *commands,
+					__be16 error, int sk_err,
+					const struct sctp_association *asoc,
+					struct sctp_transport *transport)
 {
 	pr_debug("%s: ABORT received (INIT)\n", __func__);
 
@@ -2649,15 +2675,16 @@ static sctp_disposition_t sctp_stop_t1_and_abort(struct net *net,
  *
  * The return value is the disposition of the chunk.
  */
-sctp_disposition_t sctp_sf_do_9_2_shutdown(struct net *net,
-					   const struct sctp_endpoint *ep,
-					   const struct sctp_association *asoc,
-					   const union sctp_subtype type,
-					   void *arg,
-					   struct sctp_cmd_seq *commands)
-{
+enum sctp_disposition sctp_sf_do_9_2_shutdown(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
+{
+	enum sctp_disposition disposition;
 	struct sctp_chunk *chunk = arg;
-	sctp_disposition_t disposition;
 	struct sctp_shutdownhdr *sdh;
 	struct sctp_ulpevent *ev;
 	__u32 ctsn;
@@ -2738,12 +2765,13 @@ out:
  * The Cumulative TSN Ack of the received SHUTDOWN chunk
  * MUST be processed.
  */
-sctp_disposition_t sctp_sf_do_9_2_shut_ctsn(struct net *net,
-					   const struct sctp_endpoint *ep,
-					   const struct sctp_association *asoc,
-					   const union sctp_subtype type,
-					   void *arg,
-					   struct sctp_cmd_seq *commands)
+enum sctp_disposition sctp_sf_do_9_2_shut_ctsn(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *chunk = arg;
 	struct sctp_shutdownhdr *sdh;
@@ -2791,14 +2819,15 @@ sctp_disposition_t sctp_sf_do_9_2_shut_ctsn(struct net *net,
  * that belong to this association, it should discard the INIT chunk and
  * retransmit the SHUTDOWN ACK chunk.
  */
-sctp_disposition_t sctp_sf_do_9_2_reshutack(struct net *net,
-				    const struct sctp_endpoint *ep,
-				    const struct sctp_association *asoc,
-				    const union sctp_subtype type,
-				    void *arg,
-				    struct sctp_cmd_seq *commands)
+enum sctp_disposition sctp_sf_do_9_2_reshutack(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
-	struct sctp_chunk *chunk = (struct sctp_chunk *) arg;
+	struct sctp_chunk *chunk = arg;
 	struct sctp_chunk *reply;
 
 	/* Make sure that the chunk has a valid length */
@@ -2855,12 +2884,12 @@ nomem:
  *
  * The return value is the disposition of the chunk.
  */
-sctp_disposition_t sctp_sf_do_ecn_cwr(struct net *net,
-				      const struct sctp_endpoint *ep,
-				      const struct sctp_association *asoc,
-				      const union sctp_subtype type,
-				      void *arg,
-				      struct sctp_cmd_seq *commands)
+enum sctp_disposition sctp_sf_do_ecn_cwr(struct net *net,
+					 const struct sctp_endpoint *ep,
+					 const struct sctp_association *asoc,
+					 const union sctp_subtype type,
+					 void *arg,
+					 struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *chunk = arg;
 	struct sctp_cwrhdr *cwr;
@@ -2911,12 +2940,11 @@ sctp_disposition_t sctp_sf_do_ecn_cwr(struct net *net,
  *
  * The return value is the disposition of the chunk.
  */
-sctp_disposition_t sctp_sf_do_ecne(struct net *net,
-				   const struct sctp_endpoint *ep,
-				   const struct sctp_association *asoc,
-				   const union sctp_subtype type,
-				   void *arg,
-				   struct sctp_cmd_seq *commands)
+enum sctp_disposition sctp_sf_do_ecne(struct net *net,
+				      const struct sctp_endpoint *ep,
+				      const struct sctp_association *asoc,
+				      const union sctp_subtype type,
+				      void *arg, struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *chunk = arg;
 	struct sctp_ecnehdr *ecne;
@@ -2968,12 +2996,12 @@ sctp_disposition_t sctp_sf_do_ecne(struct net *net,
  *
  * The return value is the disposition of the chunk.
  */
-sctp_disposition_t sctp_sf_eat_data_6_2(struct net *net,
-					const struct sctp_endpoint *ep,
-					const struct sctp_association *asoc,
-					const union sctp_subtype type,
-					void *arg,
-					struct sctp_cmd_seq *commands)
+enum sctp_disposition sctp_sf_eat_data_6_2(struct net *net,
+					   const struct sctp_endpoint *ep,
+					   const struct sctp_association *asoc,
+					   const union sctp_subtype type,
+					   void *arg,
+					   struct sctp_cmd_seq *commands)
 {
 	union sctp_arg force = SCTP_NOFORCE();
 	struct sctp_chunk *chunk = arg;
@@ -3088,12 +3116,13 @@ discard_noforce:
  *
  * The return value is the disposition of the chunk.
  */
-sctp_disposition_t sctp_sf_eat_data_fast_4_4(struct net *net,
-				     const struct sctp_endpoint *ep,
-				     const struct sctp_association *asoc,
-				     const union sctp_subtype type,
-				     void *arg,
-				     struct sctp_cmd_seq *commands)
+enum sctp_disposition sctp_sf_eat_data_fast_4_4(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *chunk = arg;
 	int error;
@@ -3179,12 +3208,12 @@ sctp_disposition_t sctp_sf_eat_data_fast_4_4(struct net *net,
  *
  * The return value is the disposition of the chunk.
  */
-sctp_disposition_t sctp_sf_eat_sack_6_2(struct net *net,
-					const struct sctp_endpoint *ep,
-					const struct sctp_association *asoc,
-					const union sctp_subtype type,
-					void *arg,
-					struct sctp_cmd_seq *commands)
+enum sctp_disposition sctp_sf_eat_sack_6_2(struct net *net,
+					   const struct sctp_endpoint *ep,
+					   const struct sctp_association *asoc,
+					   const union sctp_subtype type,
+					   void *arg,
+					   struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *chunk = arg;
 	struct sctp_sackhdr *sackh;
@@ -3253,7 +3282,8 @@ sctp_disposition_t sctp_sf_eat_sack_6_2(struct net *net,
  *
  * The return value is the disposition of the chunk.
 */
-static sctp_disposition_t sctp_sf_tabort_8_4_8(struct net *net,
+static enum sctp_disposition sctp_sf_tabort_8_4_8(
+					struct net *net,
 					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
 					const union sctp_subtype type,
@@ -3303,12 +3333,12 @@ static sctp_disposition_t sctp_sf_tabort_8_4_8(struct net *net,
  *
  * The return value is the disposition of the chunk.
 */
-sctp_disposition_t sctp_sf_operr_notify(struct net *net,
-					const struct sctp_endpoint *ep,
-					const struct sctp_association *asoc,
-					const union sctp_subtype type,
-					void *arg,
-					struct sctp_cmd_seq *commands)
+enum sctp_disposition sctp_sf_operr_notify(struct net *net,
+					   const struct sctp_endpoint *ep,
+					   const struct sctp_association *asoc,
+					   const union sctp_subtype type,
+					   void *arg,
+					   struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *chunk = arg;
 	struct sctp_errhdr *err;
@@ -3341,12 +3371,12 @@ sctp_disposition_t sctp_sf_operr_notify(struct net *net,
  *
  * The return value is the disposition.
  */
-sctp_disposition_t sctp_sf_do_9_2_final(struct net *net,
-					const struct sctp_endpoint *ep,
-					const struct sctp_association *asoc,
-					const union sctp_subtype type,
-					void *arg,
-					struct sctp_cmd_seq *commands)
+enum sctp_disposition sctp_sf_do_9_2_final(struct net *net,
+					   const struct sctp_endpoint *ep,
+					   const struct sctp_association *asoc,
+					   const union sctp_subtype type,
+					   void *arg,
+					   struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *chunk = arg;
 	struct sctp_chunk *reply;
@@ -3424,20 +3454,19 @@ nomem:
  *    receiver of the OOTB packet shall discard the OOTB packet and take
  *    no further action.
  */
-sctp_disposition_t sctp_sf_ootb(struct net *net,
-				const struct sctp_endpoint *ep,
-				const struct sctp_association *asoc,
-				const union sctp_subtype type,
-				void *arg,
-				struct sctp_cmd_seq *commands)
+enum sctp_disposition sctp_sf_ootb(struct net *net,
+				   const struct sctp_endpoint *ep,
+				   const struct sctp_association *asoc,
+				   const union sctp_subtype type,
+				   void *arg, struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *chunk = arg;
 	struct sk_buff *skb = chunk->skb;
 	struct sctp_chunkhdr *ch;
 	struct sctp_errhdr *err;
-	__u8 *ch_end;
-	int ootb_shut_ack = 0;
 	int ootb_cookie_ack = 0;
+	int ootb_shut_ack = 0;
+	__u8 *ch_end;
 
 	SCTP_INC_STATS(net, SCTP_MIB_OUTOFBLUES);
 
@@ -3513,16 +3542,17 @@ sctp_disposition_t sctp_sf_ootb(struct net *net,
  * (endpoint, asoc, type, arg, commands)
  *
  * Outputs
- * (sctp_disposition_t)
+ * (enum sctp_disposition)
  *
  * The return value is the disposition of the chunk.
  */
-static sctp_disposition_t sctp_sf_shut_8_4_5(struct net *net,
-					     const struct sctp_endpoint *ep,
-					     const struct sctp_association *asoc,
-					     const union sctp_subtype type,
-					     void *arg,
-					     struct sctp_cmd_seq *commands)
+static enum sctp_disposition sctp_sf_shut_8_4_5(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
 	struct sctp_packet *packet = NULL;
 	struct sctp_chunk *chunk = arg;
@@ -3579,12 +3609,12 @@ static sctp_disposition_t sctp_sf_shut_8_4_5(struct net *net,
  *   chunks. --piggy ]
  *
  */
-sctp_disposition_t sctp_sf_do_8_5_1_E_sa(struct net *net,
-				      const struct sctp_endpoint *ep,
-				      const struct sctp_association *asoc,
-				      const union sctp_subtype type,
-				      void *arg,
-				      struct sctp_cmd_seq *commands)
+enum sctp_disposition sctp_sf_do_8_5_1_E_sa(struct net *net,
+					    const struct sctp_endpoint *ep,
+					    const struct sctp_association *asoc,
+					    const union sctp_subtype type,
+					    void *arg,
+					    struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *chunk = arg;
 
@@ -3604,17 +3634,18 @@ sctp_disposition_t sctp_sf_do_8_5_1_E_sa(struct net *net,
 }
 
 /* ADDIP Section 4.2 Upon reception of an ASCONF Chunk.  */
-sctp_disposition_t sctp_sf_do_asconf(struct net *net,
-				     const struct sctp_endpoint *ep,
-				     const struct sctp_association *asoc,
-				     const union sctp_subtype type, void *arg,
-				     struct sctp_cmd_seq *commands)
+enum sctp_disposition sctp_sf_do_asconf(struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
-	struct sctp_chunk	*chunk = arg;
-	struct sctp_chunk	*asconf_ack = NULL;
-	struct sctp_paramhdr	*err_param = NULL;
-	struct sctp_addiphdr	*hdr;
-	__u32			serial;
+	struct sctp_paramhdr *err_param = NULL;
+	struct sctp_chunk *asconf_ack = NULL;
+	struct sctp_chunk *chunk = arg;
+	struct sctp_addiphdr *hdr;
+	__u32 serial;
 
 	if (!sctp_vtag_verify(chunk, asoc)) {
 		sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_BAD_TAG,
@@ -3721,19 +3752,19 @@ sctp_disposition_t sctp_sf_do_asconf(struct net *net,
  * When building TLV parameters for the ASCONF Chunk that will add or
  * delete IP addresses the D0 to D13 rules should be applied:
  */
-sctp_disposition_t sctp_sf_do_asconf_ack(struct net *net,
-					 const struct sctp_endpoint *ep,
-					 const struct sctp_association *asoc,
-					 const union sctp_subtype type,
-					 void *arg,
-					 struct sctp_cmd_seq *commands)
+enum sctp_disposition sctp_sf_do_asconf_ack(struct net *net,
+					    const struct sctp_endpoint *ep,
+					    const struct sctp_association *asoc,
+					    const union sctp_subtype type,
+					    void *arg,
+					    struct sctp_cmd_seq *commands)
 {
-	struct sctp_chunk	*asconf_ack = arg;
-	struct sctp_chunk	*last_asconf = asoc->addip_last_asconf;
-	struct sctp_chunk	*abort;
-	struct sctp_paramhdr	*err_param = NULL;
-	struct sctp_addiphdr	*addip_hdr;
-	__u32			sent_serial, rcvd_serial;
+	struct sctp_chunk *last_asconf = asoc->addip_last_asconf;
+	struct sctp_paramhdr *err_param = NULL;
+	struct sctp_chunk *asconf_ack = arg;
+	struct sctp_addiphdr *addip_hdr;
+	__u32 sent_serial, rcvd_serial;
+	struct sctp_chunk *abort;
 
 	if (!sctp_vtag_verify(asconf_ack, asoc)) {
 		sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_BAD_TAG,
@@ -3840,11 +3871,12 @@ sctp_disposition_t sctp_sf_do_asconf_ack(struct net *net,
 }
 
 /* RE-CONFIG Section 5.2 Upon reception of an RECONF Chunk. */
-sctp_disposition_t sctp_sf_do_reconf(struct net *net,
-				     const struct sctp_endpoint *ep,
-				     const struct sctp_association *asoc,
-				     const union sctp_subtype type, void *arg,
-				     struct sctp_cmd_seq *commands)
+enum sctp_disposition sctp_sf_do_reconf(struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
 	struct sctp_paramhdr *err_param = NULL;
 	struct sctp_chunk *chunk = arg;
@@ -3916,15 +3948,15 @@ sctp_disposition_t sctp_sf_do_reconf(struct net *net,
  *
  * The return value is the disposition of the chunk.
  */
-sctp_disposition_t sctp_sf_eat_fwd_tsn(struct net *net,
-				       const struct sctp_endpoint *ep,
-				       const struct sctp_association *asoc,
-				       const union sctp_subtype type,
-				       void *arg,
-				       struct sctp_cmd_seq *commands)
+enum sctp_disposition sctp_sf_eat_fwd_tsn(struct net *net,
+					  const struct sctp_endpoint *ep,
+					  const struct sctp_association *asoc,
+					  const union sctp_subtype type,
+					  void *arg,
+					  struct sctp_cmd_seq *commands)
 {
-	struct sctp_chunk *chunk = arg;
 	struct sctp_fwdtsn_hdr *fwdtsn_hdr;
+	struct sctp_chunk *chunk = arg;
 	struct sctp_fwdtsn_skip *skip;
 	__u16 len;
 	__u32 tsn;
@@ -3986,16 +4018,16 @@ discard_noforce:
 	return SCTP_DISPOSITION_DISCARD;
 }
 
-sctp_disposition_t sctp_sf_eat_fwd_tsn_fast(
-	struct net *net,
-	const struct sctp_endpoint *ep,
-	const struct sctp_association *asoc,
-	const union sctp_subtype type,
-	void *arg,
-	struct sctp_cmd_seq *commands)
+enum sctp_disposition sctp_sf_eat_fwd_tsn_fast(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
-	struct sctp_chunk *chunk = arg;
 	struct sctp_fwdtsn_hdr *fwdtsn_hdr;
+	struct sctp_chunk *chunk = arg;
 	struct sctp_fwdtsn_skip *skip;
 	__u16 len;
 	__u32 tsn;
@@ -4079,18 +4111,17 @@ gen_shutdown:
  * The return value is the disposition of the chunk.
  */
 static enum sctp_ierror sctp_sf_authenticate(
-				struct net *net,
-				const struct sctp_endpoint *ep,
-				const struct sctp_association *asoc,
-				const union sctp_subtype type,
-				struct sctp_chunk *chunk)
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					struct sctp_chunk *chunk)
 {
 	struct sctp_authhdr *auth_hdr;
+	__u8 *save_digest, *digest;
 	struct sctp_hmac *hmac;
 	unsigned int sig_len;
 	__u16 key_id;
-	__u8 *save_digest;
-	__u8 *digest;
 
 	/* Pull in the auth header, so we can do some more verification */
 	auth_hdr = (struct sctp_authhdr *)chunk->skb->data;
@@ -4154,12 +4185,11 @@ nomem:
 	return SCTP_IERROR_NOMEM;
 }
 
-sctp_disposition_t sctp_sf_eat_auth(struct net *net,
-				    const struct sctp_endpoint *ep,
-				    const struct sctp_association *asoc,
-				    const union sctp_subtype type,
-				    void *arg,
-				    struct sctp_cmd_seq *commands)
+enum sctp_disposition sctp_sf_eat_auth(struct net *net,
+				       const struct sctp_endpoint *ep,
+				       const struct sctp_association *asoc,
+				       const union sctp_subtype type,
+				       void *arg, struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *chunk = arg;
 	struct sctp_authhdr *auth_hdr;
@@ -4251,12 +4281,12 @@ sctp_disposition_t sctp_sf_eat_auth(struct net *net,
  *
  * The return value is the disposition of the chunk.
  */
-sctp_disposition_t sctp_sf_unk_chunk(struct net *net,
-				     const struct sctp_endpoint *ep,
-				     const struct sctp_association *asoc,
-				     const union sctp_subtype type,
-				     void *arg,
-				     struct sctp_cmd_seq *commands)
+enum sctp_disposition sctp_sf_unk_chunk(struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *unk_chunk = arg;
 	struct sctp_chunk *err_chunk;
@@ -4331,12 +4361,12 @@ sctp_disposition_t sctp_sf_unk_chunk(struct net *net,
  *
  * The return value is the disposition of the chunk.
  */
-sctp_disposition_t sctp_sf_discard_chunk(struct net *net,
-					 const struct sctp_endpoint *ep,
-					 const struct sctp_association *asoc,
-					 const union sctp_subtype type,
-					 void *arg,
-					 struct sctp_cmd_seq *commands)
+enum sctp_disposition sctp_sf_discard_chunk(struct net *net,
+					    const struct sctp_endpoint *ep,
+					    const struct sctp_association *asoc,
+					    const union sctp_subtype type,
+					    void *arg,
+					    struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *chunk = arg;
 
@@ -4371,12 +4401,11 @@ sctp_disposition_t sctp_sf_discard_chunk(struct net *net,
  *
  * The return value is the disposition of the chunk.
  */
-sctp_disposition_t sctp_sf_pdiscard(struct net *net,
-				    const struct sctp_endpoint *ep,
-				    const struct sctp_association *asoc,
-				    const union sctp_subtype type,
-				    void *arg,
-				    struct sctp_cmd_seq *commands)
+enum sctp_disposition sctp_sf_pdiscard(struct net *net,
+				       const struct sctp_endpoint *ep,
+				       const struct sctp_association *asoc,
+				       const union sctp_subtype type,
+				       void *arg, struct sctp_cmd_seq *commands)
 {
 	SCTP_INC_STATS(net, SCTP_MIB_IN_PKT_DISCARDS);
 	sctp_add_cmd_sf(commands, SCTP_CMD_DISCARD_PACKET, SCTP_NULL());
@@ -4399,12 +4428,12 @@ sctp_disposition_t sctp_sf_pdiscard(struct net *net,
  * We simply tag the chunk as a violation.  The state machine will log
  * the violation and continue.
  */
-sctp_disposition_t sctp_sf_violation(struct net *net,
-				     const struct sctp_endpoint *ep,
-				     const struct sctp_association *asoc,
-				     const union sctp_subtype type,
-				     void *arg,
-				     struct sctp_cmd_seq *commands)
+enum sctp_disposition sctp_sf_violation(struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *chunk = arg;
 
@@ -4419,14 +4448,14 @@ sctp_disposition_t sctp_sf_violation(struct net *net,
 /*
  * Common function to handle a protocol violation.
  */
-static sctp_disposition_t sctp_sf_abort_violation(
-				     struct net *net,
-				     const struct sctp_endpoint *ep,
-				     const struct sctp_association *asoc,
-				     void *arg,
-				     struct sctp_cmd_seq *commands,
-				     const __u8 *payload,
-				     const size_t paylen)
+static enum sctp_disposition sctp_sf_abort_violation(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					void *arg,
+					struct sctp_cmd_seq *commands,
+					const __u8 *payload,
+					const size_t paylen)
 {
 	struct sctp_packet *packet = NULL;
 	struct sctp_chunk *chunk =  arg;
@@ -4536,18 +4565,18 @@ nomem:
  *
  * Generate an  ABORT chunk and terminate the association.
  */
-static sctp_disposition_t sctp_sf_violation_chunklen(
-				     struct net *net,
-				     const struct sctp_endpoint *ep,
-				     const struct sctp_association *asoc,
-				     const union sctp_subtype type,
-				     void *arg,
-				     struct sctp_cmd_seq *commands)
+static enum sctp_disposition sctp_sf_violation_chunklen(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
 	static const char err_str[] = "The following chunk had invalid length:";
 
 	return sctp_sf_abort_violation(net, ep, asoc, arg, commands, err_str,
-					sizeof(err_str));
+				       sizeof(err_str));
 }
 
 /*
@@ -4556,17 +4585,17 @@ static sctp_disposition_t sctp_sf_violation_chunklen(
  * or accumulated length in multi parameters exceeds the end of the chunk,
  * the length is considered as invalid.
  */
-static sctp_disposition_t sctp_sf_violation_paramlen(
-				     struct net *net,
-				     const struct sctp_endpoint *ep,
-				     const struct sctp_association *asoc,
-				     const union sctp_subtype type,
-				     void *arg, void *ext,
-				     struct sctp_cmd_seq *commands)
+static enum sctp_disposition sctp_sf_violation_paramlen(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg, void *ext,
+					struct sctp_cmd_seq *commands)
 {
-	struct sctp_chunk *chunk =  arg;
 	struct sctp_paramhdr *param = ext;
 	struct sctp_chunk *abort = NULL;
+	struct sctp_chunk *chunk = arg;
 
 	if (sctp_auth_recv_cid(SCTP_CID_ABORT, asoc))
 		goto discard;
@@ -4599,18 +4628,18 @@ nomem:
  * We inform the other end by sending an ABORT with a Protocol Violation
  * error code.
  */
-static sctp_disposition_t sctp_sf_violation_ctsn(
-				     struct net *net,
-				     const struct sctp_endpoint *ep,
-				     const struct sctp_association *asoc,
-				     const union sctp_subtype type,
-				     void *arg,
-				     struct sctp_cmd_seq *commands)
+static enum sctp_disposition sctp_sf_violation_ctsn(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
 	static const char err_str[] = "The cumulative tsn ack beyond the max tsn currently sent:";
 
 	return sctp_sf_abort_violation(net, ep, asoc, arg, commands, err_str,
-					sizeof(err_str));
+				       sizeof(err_str));
 }
 
 /* Handle protocol violation of an invalid chunk bundling.  For example,
@@ -4619,13 +4648,13 @@ static sctp_disposition_t sctp_sf_violation_ctsn(
  * statement from the specs.  Additionally, there might be an attacker
  * on the path and we may not want to continue this communication.
  */
-static sctp_disposition_t sctp_sf_violation_chunk(
-				     struct net *net,
-				     const struct sctp_endpoint *ep,
-				     const struct sctp_association *asoc,
-				     const union sctp_subtype type,
-				     void *arg,
-				     struct sctp_cmd_seq *commands)
+static enum sctp_disposition sctp_sf_violation_chunk(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
 	static const char err_str[] = "The following chunk violates protocol:";
 
@@ -4633,7 +4662,7 @@ static sctp_disposition_t sctp_sf_violation_chunk(
 		return sctp_sf_violation(net, ep, asoc, type, arg, commands);
 
 	return sctp_sf_abort_violation(net, ep, asoc, arg, commands, err_str,
-					sizeof(err_str));
+				       sizeof(err_str));
 }
 /***************************************************************************
  * These are the state functions for handling primitive (Section 10) events.
@@ -4695,15 +4724,15 @@ static sctp_disposition_t sctp_sf_violation_chunk(
  *
  * The return value is a disposition.
  */
-sctp_disposition_t sctp_sf_do_prm_asoc(struct net *net,
-				       const struct sctp_endpoint *ep,
-				       const struct sctp_association *asoc,
-				       const union sctp_subtype type,
-				       void *arg,
-				       struct sctp_cmd_seq *commands)
+enum sctp_disposition sctp_sf_do_prm_asoc(struct net *net,
+					  const struct sctp_endpoint *ep,
+					  const struct sctp_association *asoc,
+					  const union sctp_subtype type,
+					  void *arg,
+					  struct sctp_cmd_seq *commands)
 {
-	struct sctp_chunk *repl;
 	struct sctp_association *my_asoc;
+	struct sctp_chunk *repl;
 
 	/* The comment below says that we enter COOKIE-WAIT AFTER
 	 * sending the INIT, but that doesn't actually work in our
@@ -4807,12 +4836,12 @@ nomem:
  *
  * The return value is the disposition.
  */
-sctp_disposition_t sctp_sf_do_prm_send(struct net *net,
-				       const struct sctp_endpoint *ep,
-				       const struct sctp_association *asoc,
-				       const union sctp_subtype type,
-				       void *arg,
-				       struct sctp_cmd_seq *commands)
+enum sctp_disposition sctp_sf_do_prm_send(struct net *net,
+					  const struct sctp_endpoint *ep,
+					  const struct sctp_association *asoc,
+					  const union sctp_subtype type,
+					  void *arg,
+					  struct sctp_cmd_seq *commands)
 {
 	struct sctp_datamsg *msg = arg;
 
@@ -4846,15 +4875,15 @@ sctp_disposition_t sctp_sf_do_prm_send(struct net *net,
  *
  * The return value is the disposition.
  */
-sctp_disposition_t sctp_sf_do_9_2_prm_shutdown(
-	struct net *net,
-	const struct sctp_endpoint *ep,
-	const struct sctp_association *asoc,
-	const union sctp_subtype type,
-	void *arg,
-	struct sctp_cmd_seq *commands)
+enum sctp_disposition sctp_sf_do_9_2_prm_shutdown(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
-	int disposition;
+	enum sctp_disposition disposition;
 
 	/* From 9.2 Shutdown of an Association
 	 * Upon receipt of the SHUTDOWN primitive from its upper
@@ -4872,6 +4901,7 @@ sctp_disposition_t sctp_sf_do_9_2_prm_shutdown(
 		disposition = sctp_sf_do_9_2_start_shutdown(net, ep, asoc, type,
 							    arg, commands);
 	}
+
 	return disposition;
 }
 
@@ -4902,13 +4932,13 @@ sctp_disposition_t sctp_sf_do_9_2_prm_shutdown(
  *
  * The return value is the disposition.
  */
-sctp_disposition_t sctp_sf_do_9_1_prm_abort(
-	struct net *net,
-	const struct sctp_endpoint *ep,
-	const struct sctp_association *asoc,
-	const union sctp_subtype type,
-	void *arg,
-	struct sctp_cmd_seq *commands)
+enum sctp_disposition sctp_sf_do_9_1_prm_abort(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
 	/* From 9.1 Abort of an Association
 	 * Upon receipt of the ABORT primitive from its upper
@@ -4940,12 +4970,12 @@ sctp_disposition_t sctp_sf_do_9_1_prm_abort(
 }
 
 /* We tried an illegal operation on an association which is closed.  */
-sctp_disposition_t sctp_sf_error_closed(struct net *net,
-					const struct sctp_endpoint *ep,
-					const struct sctp_association *asoc,
-					const union sctp_subtype type,
-					void *arg,
-					struct sctp_cmd_seq *commands)
+enum sctp_disposition sctp_sf_error_closed(struct net *net,
+					   const struct sctp_endpoint *ep,
+					   const struct sctp_association *asoc,
+					   const union sctp_subtype type,
+					   void *arg,
+					   struct sctp_cmd_seq *commands)
 {
 	sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_ERROR, SCTP_ERROR(-EINVAL));
 	return SCTP_DISPOSITION_CONSUME;
@@ -4954,12 +4984,13 @@ sctp_disposition_t sctp_sf_error_closed(struct net *net,
 /* We tried an illegal operation on an association which is shutting
  * down.
  */
-sctp_disposition_t sctp_sf_error_shutdown(struct net *net,
-					  const struct sctp_endpoint *ep,
-					  const struct sctp_association *asoc,
-					  const union sctp_subtype type,
-					  void *arg,
-					  struct sctp_cmd_seq *commands)
+enum sctp_disposition sctp_sf_error_shutdown(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
 	sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_ERROR,
 			SCTP_ERROR(-ESHUTDOWN));
@@ -4980,13 +5011,13 @@ sctp_disposition_t sctp_sf_error_shutdown(struct net *net,
  * Outputs
  * (timers)
  */
-sctp_disposition_t sctp_sf_cookie_wait_prm_shutdown(
-	struct net *net,
-	const struct sctp_endpoint *ep,
-	const struct sctp_association *asoc,
-	const union sctp_subtype type,
-	void *arg,
-	struct sctp_cmd_seq *commands)
+enum sctp_disposition sctp_sf_cookie_wait_prm_shutdown(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
 	sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP,
 			SCTP_TO(SCTP_EVENT_TIMEOUT_T1_INIT));
@@ -5015,12 +5046,13 @@ sctp_disposition_t sctp_sf_cookie_wait_prm_shutdown(
  * Outputs
  * (timers)
  */
-sctp_disposition_t sctp_sf_cookie_echoed_prm_shutdown(
-	struct net *net,
-	const struct sctp_endpoint *ep,
-	const struct sctp_association *asoc,
-	const union sctp_subtype type,
-	void *arg, struct sctp_cmd_seq *commands)
+enum sctp_disposition sctp_sf_cookie_echoed_prm_shutdown(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
 	/* There is a single T1 timer, so we should be able to use
 	 * common function with the COOKIE-WAIT state.
@@ -5042,13 +5074,13 @@ sctp_disposition_t sctp_sf_cookie_echoed_prm_shutdown(
  * Outputs
  * (timers)
  */
-sctp_disposition_t sctp_sf_cookie_wait_prm_abort(
-	struct net *net,
-	const struct sctp_endpoint *ep,
-	const struct sctp_association *asoc,
-	const union sctp_subtype type,
-	void *arg,
-	struct sctp_cmd_seq *commands)
+enum sctp_disposition sctp_sf_cookie_wait_prm_abort(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *abort = arg;
 
@@ -5091,13 +5123,13 @@ sctp_disposition_t sctp_sf_cookie_wait_prm_abort(
  * Outputs
  * (timers)
  */
-sctp_disposition_t sctp_sf_cookie_echoed_prm_abort(
-	struct net *net,
-	const struct sctp_endpoint *ep,
-	const struct sctp_association *asoc,
-	const union sctp_subtype type,
-	void *arg,
-	struct sctp_cmd_seq *commands)
+enum sctp_disposition sctp_sf_cookie_echoed_prm_abort(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
 	/* There is a single T1 timer, so we should be able to use
 	 * common function with the COOKIE-WAIT state.
@@ -5117,13 +5149,13 @@ sctp_disposition_t sctp_sf_cookie_echoed_prm_abort(
  * Outputs
  * (timers)
  */
-sctp_disposition_t sctp_sf_shutdown_pending_prm_abort(
-	struct net *net,
-	const struct sctp_endpoint *ep,
-	const struct sctp_association *asoc,
-	const union sctp_subtype type,
-	void *arg,
-	struct sctp_cmd_seq *commands)
+enum sctp_disposition sctp_sf_shutdown_pending_prm_abort(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
 	/* Stop the T5-shutdown guard timer.  */
 	sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP,
@@ -5144,13 +5176,13 @@ sctp_disposition_t sctp_sf_shutdown_pending_prm_abort(
  * Outputs
  * (timers)
  */
-sctp_disposition_t sctp_sf_shutdown_sent_prm_abort(
-	struct net *net,
-	const struct sctp_endpoint *ep,
-	const struct sctp_association *asoc,
-	const union sctp_subtype type,
-	void *arg,
-	struct sctp_cmd_seq *commands)
+enum sctp_disposition sctp_sf_shutdown_sent_prm_abort(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
 	/* Stop the T2-shutdown timer.  */
 	sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP,
@@ -5175,13 +5207,13 @@ sctp_disposition_t sctp_sf_shutdown_sent_prm_abort(
  * Outputs
  * (timers)
  */
-sctp_disposition_t sctp_sf_shutdown_ack_sent_prm_abort(
-	struct net *net,
-	const struct sctp_endpoint *ep,
-	const struct sctp_association *asoc,
-	const union sctp_subtype type,
-	void *arg,
-	struct sctp_cmd_seq *commands)
+enum sctp_disposition sctp_sf_shutdown_ack_sent_prm_abort(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
 	/* The same T2 timer, so we should be able to use
 	 * common function with the SHUTDOWN-SENT state.
@@ -5211,7 +5243,7 @@ sctp_disposition_t sctp_sf_shutdown_ack_sent_prm_abort(
  * o destination transport address - the transport address of the
  *   association on which a heartbeat should be issued.
  */
-sctp_disposition_t sctp_sf_do_prm_requestheartbeat(
+enum sctp_disposition sctp_sf_do_prm_requestheartbeat(
 					struct net *net,
 					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
@@ -5244,12 +5276,12 @@ sctp_disposition_t sctp_sf_do_prm_requestheartbeat(
  * When an endpoint has an ASCONF signaled change to be sent to the
  * remote endpoint it should do A1 to A9
  */
-sctp_disposition_t sctp_sf_do_prm_asconf(struct net *net,
-					const struct sctp_endpoint *ep,
-					const struct sctp_association *asoc,
-					const union sctp_subtype type,
-					void *arg,
-					struct sctp_cmd_seq *commands)
+enum sctp_disposition sctp_sf_do_prm_asconf(struct net *net,
+					    const struct sctp_endpoint *ep,
+					    const struct sctp_association *asoc,
+					    const union sctp_subtype type,
+					    void *arg,
+					    struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *chunk = arg;
 
@@ -5261,12 +5293,12 @@ sctp_disposition_t sctp_sf_do_prm_asconf(struct net *net,
 }
 
 /* RE-CONFIG Section 5.1 RECONF Chunk Procedures */
-sctp_disposition_t sctp_sf_do_prm_reconf(struct net *net,
-					 const struct sctp_endpoint *ep,
-					 const struct sctp_association *asoc,
-					 const union sctp_subtype type,
-					 void *arg,
-					 struct sctp_cmd_seq *commands)
+enum sctp_disposition sctp_sf_do_prm_reconf(struct net *net,
+					    const struct sctp_endpoint *ep,
+					    const struct sctp_association *asoc,
+					    const union sctp_subtype type,
+					    void *arg,
+					    struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *chunk = arg;
 
@@ -5279,13 +5311,13 @@ sctp_disposition_t sctp_sf_do_prm_reconf(struct net *net,
  *
  * The return value is the disposition of the primitive.
  */
-sctp_disposition_t sctp_sf_ignore_primitive(
-	struct net *net,
-	const struct sctp_endpoint *ep,
-	const struct sctp_association *asoc,
-	const union sctp_subtype type,
-	void *arg,
-	struct sctp_cmd_seq *commands)
+enum sctp_disposition sctp_sf_ignore_primitive(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
 	pr_debug("%s: primitive type:%d is ignored\n", __func__,
 		 type.primitive);
@@ -5303,13 +5335,13 @@ sctp_disposition_t sctp_sf_ignore_primitive(
  * subscribes to this event, if there is no data to be sent or
  * retransmit, the stack will immediately send up this notification.
  */
-sctp_disposition_t sctp_sf_do_no_pending_tsn(
-	struct net *net,
-	const struct sctp_endpoint *ep,
-	const struct sctp_association *asoc,
-	const union sctp_subtype type,
-	void *arg,
-	struct sctp_cmd_seq *commands)
+enum sctp_disposition sctp_sf_do_no_pending_tsn(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
 	struct sctp_ulpevent *event;
 
@@ -5335,13 +5367,13 @@ sctp_disposition_t sctp_sf_do_no_pending_tsn(
  *
  * The return value is the disposition.
  */
-sctp_disposition_t sctp_sf_do_9_2_start_shutdown(
-	struct net *net,
-	const struct sctp_endpoint *ep,
-	const struct sctp_association *asoc,
-	const union sctp_subtype type,
-	void *arg,
-	struct sctp_cmd_seq *commands)
+enum sctp_disposition sctp_sf_do_9_2_start_shutdown(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *reply;
 
@@ -5405,15 +5437,15 @@ nomem:
  *
  * The return value is the disposition.
  */
-sctp_disposition_t sctp_sf_do_9_2_shutdown_ack(
-	struct net *net,
-	const struct sctp_endpoint *ep,
-	const struct sctp_association *asoc,
-	const union sctp_subtype type,
-	void *arg,
-	struct sctp_cmd_seq *commands)
+enum sctp_disposition sctp_sf_do_9_2_shutdown_ack(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
-	struct sctp_chunk *chunk = (struct sctp_chunk *) arg;
+	struct sctp_chunk *chunk = arg;
 	struct sctp_chunk *reply;
 
 	/* There are 2 ways of getting here:
@@ -5479,12 +5511,12 @@ nomem:
  *
  * The return value is the disposition of the event.
  */
-sctp_disposition_t sctp_sf_ignore_other(struct net *net,
-					const struct sctp_endpoint *ep,
-					const struct sctp_association *asoc,
-					const union sctp_subtype type,
-					void *arg,
-					struct sctp_cmd_seq *commands)
+enum sctp_disposition sctp_sf_ignore_other(struct net *net,
+					   const struct sctp_endpoint *ep,
+					   const struct sctp_association *asoc,
+					   const union sctp_subtype type,
+					   void *arg,
+					   struct sctp_cmd_seq *commands)
 {
 	pr_debug("%s: the event other type:%d is ignored\n",
 		 __func__, type.other);
@@ -5507,12 +5539,12 @@ sctp_disposition_t sctp_sf_ignore_other(struct net *net,
  *
  * The return value is the disposition of the chunk.
  */
-sctp_disposition_t sctp_sf_do_6_3_3_rtx(struct net *net,
-					const struct sctp_endpoint *ep,
-					const struct sctp_association *asoc,
-					const union sctp_subtype type,
-					void *arg,
-					struct sctp_cmd_seq *commands)
+enum sctp_disposition sctp_sf_do_6_3_3_rtx(struct net *net,
+					   const struct sctp_endpoint *ep,
+					   const struct sctp_association *asoc,
+					   const union sctp_subtype type,
+					   void *arg,
+					   struct sctp_cmd_seq *commands)
 {
 	struct sctp_transport *transport = arg;
 
@@ -5595,12 +5627,12 @@ sctp_disposition_t sctp_sf_do_6_3_3_rtx(struct net *net,
  * allow. However, an SCTP transmitter MUST NOT be more aggressive than
  * the following algorithms allow.
  */
-sctp_disposition_t sctp_sf_do_6_2_sack(struct net *net,
-				       const struct sctp_endpoint *ep,
-				       const struct sctp_association *asoc,
-				       const union sctp_subtype type,
-				       void *arg,
-				       struct sctp_cmd_seq *commands)
+enum sctp_disposition sctp_sf_do_6_2_sack(struct net *net,
+					  const struct sctp_endpoint *ep,
+					  const struct sctp_association *asoc,
+					  const union sctp_subtype type,
+					  void *arg,
+					  struct sctp_cmd_seq *commands)
 {
 	SCTP_INC_STATS(net, SCTP_MIB_DELAY_SACK_EXPIREDS);
 	sctp_add_cmd_sf(commands, SCTP_CMD_GEN_SACK, SCTP_FORCE());
@@ -5626,16 +5658,17 @@ sctp_disposition_t sctp_sf_do_6_2_sack(struct net *net,
  * (timers, events)
  *
  */
-sctp_disposition_t sctp_sf_t1_init_timer_expire(struct net *net,
-					   const struct sctp_endpoint *ep,
-					   const struct sctp_association *asoc,
-					   const union sctp_subtype type,
-					   void *arg,
-					   struct sctp_cmd_seq *commands)
+enum sctp_disposition sctp_sf_t1_init_timer_expire(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
+	int attempts = asoc->init_err_counter + 1;
 	struct sctp_chunk *repl = NULL;
 	struct sctp_bind_addr *bp;
-	int attempts = asoc->init_err_counter + 1;
 
 	pr_debug("%s: timer T1 expired (INIT)\n", __func__);
 
@@ -5690,15 +5723,16 @@ sctp_disposition_t sctp_sf_t1_init_timer_expire(struct net *net,
  * (timers, events)
  *
  */
-sctp_disposition_t sctp_sf_t1_cookie_timer_expire(struct net *net,
-					   const struct sctp_endpoint *ep,
-					   const struct sctp_association *asoc,
-					   const union sctp_subtype type,
-					   void *arg,
-					   struct sctp_cmd_seq *commands)
+enum sctp_disposition sctp_sf_t1_cookie_timer_expire(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
-	struct sctp_chunk *repl = NULL;
 	int attempts = asoc->init_err_counter + 1;
+	struct sctp_chunk *repl = NULL;
 
 	pr_debug("%s: timer T1 expired (COOKIE-ECHO)\n", __func__);
 
@@ -5740,12 +5774,13 @@ sctp_disposition_t sctp_sf_t1_cookie_timer_expire(struct net *net,
  * the T2-Shutdown timer,  giving its peer ample opportunity to transmit
  * all of its queued DATA chunks that have not yet been sent.
  */
-sctp_disposition_t sctp_sf_t2_timer_expire(struct net *net,
-					   const struct sctp_endpoint *ep,
-					   const struct sctp_association *asoc,
-					   const union sctp_subtype type,
-					   void *arg,
-					   struct sctp_cmd_seq *commands)
+enum sctp_disposition sctp_sf_t2_timer_expire(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *reply = NULL;
 
@@ -5810,13 +5845,13 @@ nomem:
  * ADDIP Section 4.1 ASCONF CHunk Procedures
  * If the T4 RTO timer expires the endpoint should do B1 to B5
  */
-sctp_disposition_t sctp_sf_t4_timer_expire(
-	struct net *net,
-	const struct sctp_endpoint *ep,
-	const struct sctp_association *asoc,
-	const union sctp_subtype type,
-	void *arg,
-	struct sctp_cmd_seq *commands)
+enum sctp_disposition sctp_sf_t4_timer_expire(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *chunk = asoc->addip_last_asconf;
 	struct sctp_transport *transport = chunk->transport;
@@ -5882,12 +5917,13 @@ sctp_disposition_t sctp_sf_t4_timer_expire(
  * At the expiration of this timer the sender SHOULD abort the association
  * by sending an ABORT chunk.
  */
-sctp_disposition_t sctp_sf_t5_timer_expire(struct net *net,
-					   const struct sctp_endpoint *ep,
-					   const struct sctp_association *asoc,
-					   const union sctp_subtype type,
-					   void *arg,
-					   struct sctp_cmd_seq *commands)
+enum sctp_disposition sctp_sf_t5_timer_expire(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *reply = NULL;
 
@@ -5918,15 +5954,15 @@ nomem:
  * The work that needs to be done is same as when SHUTDOWN is initiated by
  * the user.  So this routine looks same as sctp_sf_do_9_2_prm_shutdown().
  */
-sctp_disposition_t sctp_sf_autoclose_timer_expire(
-	struct net *net,
-	const struct sctp_endpoint *ep,
-	const struct sctp_association *asoc,
-	const union sctp_subtype type,
-	void *arg,
-	struct sctp_cmd_seq *commands)
+enum sctp_disposition sctp_sf_autoclose_timer_expire(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
-	int disposition;
+	enum sctp_disposition disposition;
 
 	SCTP_INC_STATS(net, SCTP_MIB_AUTOCLOSE_EXPIREDS);
 
@@ -5946,6 +5982,7 @@ sctp_disposition_t sctp_sf_autoclose_timer_expire(
 		disposition = sctp_sf_do_9_2_start_shutdown(net, ep, asoc, type,
 							    arg, commands);
 	}
+
 	return disposition;
 }
 
@@ -5961,12 +5998,11 @@ sctp_disposition_t sctp_sf_autoclose_timer_expire(
  *
  * The return value is the disposition of the chunk.
  */
-sctp_disposition_t sctp_sf_not_impl(struct net *net,
-				    const struct sctp_endpoint *ep,
-				    const struct sctp_association *asoc,
-				    const union sctp_subtype type,
-				    void *arg,
-				    struct sctp_cmd_seq *commands)
+enum sctp_disposition sctp_sf_not_impl(struct net *net,
+				       const struct sctp_endpoint *ep,
+				       const struct sctp_association *asoc,
+				       const union sctp_subtype type,
+				       void *arg, struct sctp_cmd_seq *commands)
 {
 	return SCTP_DISPOSITION_NOT_IMPL;
 }
@@ -5979,12 +6015,11 @@ sctp_disposition_t sctp_sf_not_impl(struct net *net,
  *
  * The return value is the disposition of the chunk.
  */
-sctp_disposition_t sctp_sf_bug(struct net *net,
-			       const struct sctp_endpoint *ep,
-			       const struct sctp_association *asoc,
-			       const union sctp_subtype type,
-			       void *arg,
-			       struct sctp_cmd_seq *commands)
+enum sctp_disposition sctp_sf_bug(struct net *net,
+				  const struct sctp_endpoint *ep,
+				  const struct sctp_association *asoc,
+				  const union sctp_subtype type,
+				  void *arg, struct sctp_cmd_seq *commands)
 {
 	return SCTP_DISPOSITION_BUG;
 }
@@ -6000,12 +6035,12 @@ sctp_disposition_t sctp_sf_bug(struct net *net,
  *
  * The return value is the disposition of the chunk.
  */
-sctp_disposition_t sctp_sf_timer_ignore(struct net *net,
-					const struct sctp_endpoint *ep,
-					const struct sctp_association *asoc,
-					const union sctp_subtype type,
-					void *arg,
-					struct sctp_cmd_seq *commands)
+enum sctp_disposition sctp_sf_timer_ignore(struct net *net,
+					   const struct sctp_endpoint *ep,
+					   const struct sctp_association *asoc,
+					   const union sctp_subtype type,
+					   void *arg,
+					   struct sctp_cmd_seq *commands)
 {
 	pr_debug("%s: timer %d ignored\n", __func__, type.chunk);
 
@@ -6020,9 +6055,9 @@ sctp_disposition_t sctp_sf_timer_ignore(struct net *net,
 static struct sctp_sackhdr *sctp_sm_pull_sack(struct sctp_chunk *chunk)
 {
 	struct sctp_sackhdr *sack;
+	__u16 num_dup_tsns;
 	unsigned int len;
 	__u16 num_blocks;
-	__u16 num_dup_tsns;
 
 	/* Protect ourselves from reading too far into
 	 * the skb from a bogus sender.
@@ -6044,12 +6079,12 @@ static struct sctp_sackhdr *sctp_sm_pull_sack(struct sctp_chunk *chunk)
 /* Create an ABORT packet to be sent as a response, with the specified
  * error causes.
  */
-static struct sctp_packet *sctp_abort_pkt_new(struct net *net,
-				  const struct sctp_endpoint *ep,
-				  const struct sctp_association *asoc,
-				  struct sctp_chunk *chunk,
-				  const void *payload,
-				  size_t paylen)
+static struct sctp_packet *sctp_abort_pkt_new(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					struct sctp_chunk *chunk,
+					const void *payload, size_t paylen)
 {
 	struct sctp_packet *packet;
 	struct sctp_chunk *abort;
@@ -6086,14 +6121,14 @@ static struct sctp_packet *sctp_abort_pkt_new(struct net *net,
 }
 
 /* Allocate a packet for responding in the OOTB conditions.  */
-static struct sctp_packet *sctp_ootb_pkt_new(struct net *net,
-					     const struct sctp_association *asoc,
-					     const struct sctp_chunk *chunk)
+static struct sctp_packet *sctp_ootb_pkt_new(
+					struct net *net,
+					const struct sctp_association *asoc,
+					const struct sctp_chunk *chunk)
 {
-	struct sctp_packet *packet;
 	struct sctp_transport *transport;
-	__u16 sport;
-	__u16 dport;
+	struct sctp_packet *packet;
+	__u16 sport, dport;
 	__u32 vtag;
 
 	/* Get the source and destination port from the inbound packet.  */
@@ -6202,18 +6237,17 @@ static int sctp_eat_data(const struct sctp_association *asoc,
 			 struct sctp_chunk *chunk,
 			 struct sctp_cmd_seq *commands)
 {
-	struct sctp_datahdr *data_hdr;
-	struct sctp_chunk *err;
-	size_t datalen;
-	enum sctp_verb deliver;
-	int tmp;
-	__u32 tsn;
 	struct sctp_tsnmap *map = (struct sctp_tsnmap *)&asoc->peer.tsn_map;
 	struct sock *sk = asoc->base.sk;
 	struct net *net = sock_net(sk);
-	u16 ssn;
-	u16 sid;
+	struct sctp_datahdr *data_hdr;
+	struct sctp_chunk *err;
+	enum sctp_verb deliver;
+	size_t datalen;
 	u8 ordered = 0;
+	u16 ssn, sid;
+	__u32 tsn;
+	int tmp;
 
 	data_hdr = (struct sctp_datahdr *)chunk->skb->data;
 	chunk->subh.data_hdr = data_hdr;
-- 
cgit v1.2.3


From 327c0dab8d1301cd866816de8c7f32eb872cabac Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Fri, 11 Aug 2017 10:23:58 +0800
Subject: sctp: fix some indents in sm_make_chunk.c

There are some bad indents of functions' defination in sm_make_chunk.c.
They have been there since beginning, it was probably caused by that
the typedef sctp_chunk_t was replaced with struct sctp_chunk.

So it's the best time to fix them in this patchset, it's also to fix
some bad indents in other functions' defination in sm_make_chunk.c.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/sm.h    | 158 ++++++++++-----------
 net/sctp/sm_make_chunk.c | 348 +++++++++++++++++++++++------------------------
 2 files changed, 249 insertions(+), 257 deletions(-)

(limited to 'include/net')

diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h
index 33077f317995..2db3d3a9ce1d 100644
--- a/include/net/sctp/sm.h
+++ b/include/net/sctp/sm.h
@@ -184,68 +184,69 @@ __u32 sctp_generate_verification_tag(void);
 void sctp_populate_tie_tags(__u8 *cookie, __u32 curTag, __u32 hisTag);
 
 /* Prototypes for chunk-building functions.  */
-struct sctp_chunk *sctp_make_init(const struct sctp_association *,
-			     const struct sctp_bind_addr *,
-			     gfp_t gfp, int vparam_len);
-struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *,
-				 const struct sctp_chunk *,
-				 const gfp_t gfp,
-				 const int unkparam_len);
-struct sctp_chunk *sctp_make_cookie_echo(const struct sctp_association *,
-				    const struct sctp_chunk *);
-struct sctp_chunk *sctp_make_cookie_ack(const struct sctp_association *,
-				   const struct sctp_chunk *);
-struct sctp_chunk *sctp_make_cwr(const struct sctp_association *,
+struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc,
+				  const struct sctp_bind_addr *bp,
+				  gfp_t gfp, int vparam_len);
+struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc,
+				      const struct sctp_chunk *chunk,
+				      const gfp_t gfp, const int unkparam_len);
+struct sctp_chunk *sctp_make_cookie_echo(const struct sctp_association *asoc,
+					 const struct sctp_chunk *chunk);
+struct sctp_chunk *sctp_make_cookie_ack(const struct sctp_association *asoc,
+					const struct sctp_chunk *chunk);
+struct sctp_chunk *sctp_make_cwr(const struct sctp_association *asoc,
 				 const __u32 lowest_tsn,
-				 const struct sctp_chunk *);
-struct sctp_chunk * sctp_make_datafrag_empty(struct sctp_association *,
-					const struct sctp_sndrcvinfo *sinfo,
-					int len, const __u8 flags,
-					__u16 ssn, gfp_t gfp);
-struct sctp_chunk *sctp_make_ecne(const struct sctp_association *,
-				  const __u32);
-struct sctp_chunk *sctp_make_sack(const struct sctp_association *);
+				 const struct sctp_chunk *chunk);
+struct sctp_chunk *sctp_make_datafrag_empty(struct sctp_association *asoc,
+					    const struct sctp_sndrcvinfo *sinfo,
+					    int len, const __u8 flags,
+					    __u16 ssn, gfp_t gfp);
+struct sctp_chunk *sctp_make_ecne(const struct sctp_association *asoc,
+				  const __u32 lowest_tsn);
+struct sctp_chunk *sctp_make_sack(const struct sctp_association *asoc);
 struct sctp_chunk *sctp_make_shutdown(const struct sctp_association *asoc,
 				      const struct sctp_chunk *chunk);
 struct sctp_chunk *sctp_make_shutdown_ack(const struct sctp_association *asoc,
-					  const struct sctp_chunk *);
-struct sctp_chunk *sctp_make_shutdown_complete(const struct sctp_association *,
-					  const struct sctp_chunk *);
-void sctp_init_cause(struct sctp_chunk *, __be16 cause, size_t);
-struct sctp_chunk *sctp_make_abort(const struct sctp_association *,
-			      const struct sctp_chunk *,
-			      const size_t hint);
-struct sctp_chunk *sctp_make_abort_no_data(const struct sctp_association *,
-				      const struct sctp_chunk *,
-				      __u32 tsn);
-struct sctp_chunk *sctp_make_abort_user(const struct sctp_association *,
-					struct msghdr *, size_t msg_len);
-struct sctp_chunk *sctp_make_abort_violation(const struct sctp_association *,
-				   const struct sctp_chunk *,
-				   const __u8 *,
-				   const size_t );
-struct sctp_chunk *sctp_make_violation_paramlen(const struct sctp_association *,
-				   const struct sctp_chunk *,
-				   struct sctp_paramhdr *);
-struct sctp_chunk *sctp_make_violation_max_retrans(const struct sctp_association *,
-						   const struct sctp_chunk *);
-struct sctp_chunk *sctp_make_heartbeat(const struct sctp_association *,
-				  const struct sctp_transport *);
-struct sctp_chunk *sctp_make_heartbeat_ack(const struct sctp_association *,
-				      const struct sctp_chunk *,
-				      const void *payload,
-				      const size_t paylen);
-struct sctp_chunk *sctp_make_op_error(const struct sctp_association *,
-				 const struct sctp_chunk *chunk,
-				 __be16 cause_code,
-				 const void *payload,
-				 size_t paylen,
-				 size_t reserve_tail);
-
-struct sctp_chunk *sctp_make_asconf_update_ip(struct sctp_association *,
-					      union sctp_addr *,
-					      struct sockaddr *,
-					      int, __be16);
+					  const struct sctp_chunk *chunk);
+struct sctp_chunk *sctp_make_shutdown_complete(
+					const struct sctp_association *asoc,
+					const struct sctp_chunk *chunk);
+void sctp_init_cause(struct sctp_chunk *chunk, __be16 cause, size_t paylen);
+struct sctp_chunk *sctp_make_abort(const struct sctp_association *asoc,
+				   const struct sctp_chunk *chunk,
+				   const size_t hint);
+struct sctp_chunk *sctp_make_abort_no_data(const struct sctp_association *asoc,
+					   const struct sctp_chunk *chunk,
+					   __u32 tsn);
+struct sctp_chunk *sctp_make_abort_user(const struct sctp_association *asoc,
+					struct msghdr *msg, size_t msg_len);
+struct sctp_chunk *sctp_make_abort_violation(
+					const struct sctp_association *asoc,
+					const struct sctp_chunk *chunk,
+					const __u8 *payload,
+					const size_t paylen);
+struct sctp_chunk *sctp_make_violation_paramlen(
+					const struct sctp_association *asoc,
+					const struct sctp_chunk *chunk,
+					struct sctp_paramhdr *param);
+struct sctp_chunk *sctp_make_violation_max_retrans(
+					const struct sctp_association *asoc,
+					const struct sctp_chunk *chunk);
+struct sctp_chunk *sctp_make_heartbeat(const struct sctp_association *asoc,
+				       const struct sctp_transport *transport);
+struct sctp_chunk *sctp_make_heartbeat_ack(const struct sctp_association *asoc,
+					   const struct sctp_chunk *chunk,
+					   const void *payload,
+					   const size_t paylen);
+struct sctp_chunk *sctp_make_op_error(const struct sctp_association *asoc,
+				      const struct sctp_chunk *chunk,
+				      __be16 cause_code, const void *payload,
+				      size_t paylen, size_t reserve_tail);
+
+struct sctp_chunk *sctp_make_asconf_update_ip(struct sctp_association *asoc,
+					      union sctp_addr *laddr,
+					      struct sockaddr *addrs,
+					      int addrcnt, __be16 flags);
 struct sctp_chunk *sctp_make_asconf_set_prim(struct sctp_association *asoc,
 					     union sctp_addr *addr);
 bool sctp_verify_asconf(const struct sctp_association *asoc,
@@ -259,27 +260,25 @@ struct sctp_chunk *sctp_make_fwdtsn(const struct sctp_association *asoc,
 				    __u32 new_cum_tsn, size_t nstreams,
 				    struct sctp_fwdtsn_skip *skiplist);
 struct sctp_chunk *sctp_make_auth(const struct sctp_association *asoc);
-struct sctp_chunk *sctp_make_strreset_req(
-				const struct sctp_association *asoc,
-				__u16 stream_num, __u16 *stream_list,
-				bool out, bool in);
+struct sctp_chunk *sctp_make_strreset_req(const struct sctp_association *asoc,
+					  __u16 stream_num, __u16 *stream_list,
+					  bool out, bool in);
 struct sctp_chunk *sctp_make_strreset_tsnreq(
-				const struct sctp_association *asoc);
+					const struct sctp_association *asoc);
 struct sctp_chunk *sctp_make_strreset_addstrm(
-				const struct sctp_association *asoc,
-				__u16 out, __u16 in);
-struct sctp_chunk *sctp_make_strreset_resp(
-				const struct sctp_association *asoc,
-				__u32 result, __u32 sn);
-struct sctp_chunk *sctp_make_strreset_tsnresp(
-				struct sctp_association *asoc,
-				__u32 result, __u32 sn,
-				__u32 sender_tsn, __u32 receiver_tsn);
+					const struct sctp_association *asoc,
+					__u16 out, __u16 in);
+struct sctp_chunk *sctp_make_strreset_resp(const struct sctp_association *asoc,
+					   __u32 result, __u32 sn);
+struct sctp_chunk *sctp_make_strreset_tsnresp(struct sctp_association *asoc,
+					      __u32 result, __u32 sn,
+					      __u32 sender_tsn,
+					      __u32 receiver_tsn);
 bool sctp_verify_reconf(const struct sctp_association *asoc,
 			struct sctp_chunk *chunk,
 			struct sctp_paramhdr **errp);
-void sctp_chunk_assign_tsn(struct sctp_chunk *);
-void sctp_chunk_assign_ssn(struct sctp_chunk *);
+void sctp_chunk_assign_tsn(struct sctp_chunk *chunk);
+void sctp_chunk_assign_ssn(struct sctp_chunk *chunk);
 
 /* Prototypes for stream-processing functions.  */
 struct sctp_chunk *sctp_process_strreset_outreq(
@@ -322,11 +321,12 @@ void sctp_generate_proto_unreach_event(unsigned long peer);
 
 void sctp_ootb_pkt_free(struct sctp_packet *packet);
 
-struct sctp_association *sctp_unpack_cookie(const struct sctp_endpoint *ep,
-				       const struct sctp_association *asoc,
-				       struct sctp_chunk *chunk,
-				       gfp_t gfp, int *err,
-				       struct sctp_chunk **err_chk_p);
+struct sctp_association *sctp_unpack_cookie(
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					struct sctp_chunk *chunk,
+					gfp_t gfp, int *err,
+					struct sctp_chunk **err_chk_p);
 
 /* 3rd level prototypes */
 __u32 sctp_generate_tag(const struct sctp_endpoint *ep);
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 51de638a88b2..ca8f196b6c6c 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -132,8 +132,8 @@ static const struct sctp_paramhdr prsctp_param = {
  * provided chunk, as most cause codes will be embedded inside an
  * abort chunk.
  */
-void  sctp_init_cause(struct sctp_chunk *chunk, __be16 cause_code,
-		      size_t paylen)
+void sctp_init_cause(struct sctp_chunk *chunk, __be16 cause_code,
+		     size_t paylen)
 {
 	struct sctp_errhdr err;
 	__u16 len;
@@ -151,7 +151,7 @@ void  sctp_init_cause(struct sctp_chunk *chunk, __be16 cause_code,
  * if there isn't enough space in the op error chunk
  */
 static int sctp_init_cause_fixed(struct sctp_chunk *chunk, __be16 cause_code,
-		      size_t paylen)
+				 size_t paylen)
 {
 	struct sctp_errhdr err;
 	__u16 len;
@@ -213,32 +213,31 @@ static int sctp_init_cause_fixed(struct sctp_chunk *chunk, __be16 cause_code,
  * Supported Address Types (Note 4)    Optional    12
  */
 struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc,
-			     const struct sctp_bind_addr *bp,
-			     gfp_t gfp, int vparam_len)
+				  const struct sctp_bind_addr *bp,
+				  gfp_t gfp, int vparam_len)
 {
 	struct net *net = sock_net(asoc->base.sk);
+	struct sctp_supported_ext_param ext_param;
+	struct sctp_adaptation_ind_param aiparam;
+	struct sctp_paramhdr *auth_chunks = NULL;
+	struct sctp_paramhdr *auth_hmacs = NULL;
+	struct sctp_supported_addrs_param sat;
 	struct sctp_endpoint *ep = asoc->ep;
-	struct sctp_inithdr init;
-	union sctp_params addrs;
-	size_t chunksize;
 	struct sctp_chunk *retval = NULL;
 	int num_types, addrs_len = 0;
+	struct sctp_inithdr init;
+	union sctp_params addrs;
 	struct sctp_sock *sp;
-	struct sctp_supported_addrs_param sat;
+	__u8 extensions[4];
+	size_t chunksize;
 	__be16 types[2];
-	struct sctp_adaptation_ind_param aiparam;
-	struct sctp_supported_ext_param ext_param;
 	int num_ext = 0;
-	__u8 extensions[4];
-	struct sctp_paramhdr *auth_chunks = NULL,
-			*auth_hmacs = NULL;
 
 	/* RFC 2960 3.3.2 Initiation (INIT) (1)
 	 *
 	 * Note 1: The INIT chunks can contain multiple addresses that
 	 * can be IPv4 and/or IPv6 in any combination.
 	 */
-	retval = NULL;
 
 	/* Convert the provided bind address list to raw format. */
 	addrs = sctp_bind_addrs_to_raw(bp, &addrs_len, gfp);
@@ -380,26 +379,24 @@ nodata:
 }
 
 struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc,
-				 const struct sctp_chunk *chunk,
-				 gfp_t gfp, int unkparam_len)
+				      const struct sctp_chunk *chunk,
+				      gfp_t gfp, int unkparam_len)
 {
+	struct sctp_supported_ext_param ext_param;
+	struct sctp_adaptation_ind_param aiparam;
+	struct sctp_paramhdr *auth_chunks = NULL;
+	struct sctp_paramhdr *auth_random = NULL;
+	struct sctp_paramhdr *auth_hmacs = NULL;
+	struct sctp_chunk *retval = NULL;
+	struct sctp_cookie_param *cookie;
 	struct sctp_inithdr initack;
-	struct sctp_chunk *retval;
 	union sctp_params addrs;
 	struct sctp_sock *sp;
-	int addrs_len;
-	struct sctp_cookie_param *cookie;
-	int cookie_len;
+	__u8 extensions[4];
 	size_t chunksize;
-	struct sctp_adaptation_ind_param aiparam;
-	struct sctp_supported_ext_param ext_param;
 	int num_ext = 0;
-	__u8 extensions[4];
-	struct sctp_paramhdr *auth_chunks = NULL,
-			*auth_hmacs = NULL,
-			*auth_random = NULL;
-
-	retval = NULL;
+	int cookie_len;
+	int addrs_len;
 
 	/* Note: there may be no addresses to embed. */
 	addrs = sctp_bind_addrs_to_raw(&asoc->base.bind_addr, &addrs_len, gfp);
@@ -562,11 +559,11 @@ nomem_cookie:
  *   to insure interoperability.
  */
 struct sctp_chunk *sctp_make_cookie_echo(const struct sctp_association *asoc,
-				    const struct sctp_chunk *chunk)
+					 const struct sctp_chunk *chunk)
 {
 	struct sctp_chunk *retval;
-	void *cookie;
 	int cookie_len;
+	void *cookie;
 
 	cookie = asoc->peer.cookie;
 	cookie_len = asoc->peer.cookie_len;
@@ -614,7 +611,7 @@ nodata:
  *   Set to zero on transmit and ignored on receipt.
  */
 struct sctp_chunk *sctp_make_cookie_ack(const struct sctp_association *asoc,
-				   const struct sctp_chunk *chunk)
+					const struct sctp_chunk *chunk)
 {
 	struct sctp_chunk *retval;
 
@@ -659,8 +656,8 @@ struct sctp_chunk *sctp_make_cookie_ack(const struct sctp_association *asoc,
  *     Note: The CWR is considered a Control chunk.
  */
 struct sctp_chunk *sctp_make_cwr(const struct sctp_association *asoc,
-			    const __u32 lowest_tsn,
-			    const struct sctp_chunk *chunk)
+				 const __u32 lowest_tsn,
+				 const struct sctp_chunk *chunk)
 {
 	struct sctp_chunk *retval;
 	struct sctp_cwrhdr cwr;
@@ -694,7 +691,7 @@ nodata:
 
 /* Make an ECNE chunk.  This is a congestion experienced report.  */
 struct sctp_chunk *sctp_make_ecne(const struct sctp_association *asoc,
-			     const __u32 lowest_tsn)
+				  const __u32 lowest_tsn)
 {
 	struct sctp_chunk *retval;
 	struct sctp_ecnehdr ecne;
@@ -715,9 +712,9 @@ nodata:
  * parameters.  However, do not populate the data payload.
  */
 struct sctp_chunk *sctp_make_datafrag_empty(struct sctp_association *asoc,
-				       const struct sctp_sndrcvinfo *sinfo,
-				       int data_len, __u8 flags, __u16 ssn,
-				       gfp_t gfp)
+					    const struct sctp_sndrcvinfo *sinfo,
+					    int data_len, __u8 flags, __u16 ssn,
+					    gfp_t gfp)
 {
 	struct sctp_chunk *retval;
 	struct sctp_datahdr dp;
@@ -755,15 +752,15 @@ nodata:
  */
 struct sctp_chunk *sctp_make_sack(const struct sctp_association *asoc)
 {
-	struct sctp_chunk *retval;
-	struct sctp_sackhdr sack;
-	int len;
-	__u32 ctsn;
-	__u16 num_gabs, num_dup_tsns;
-	struct sctp_association *aptr = (struct sctp_association *)asoc;
 	struct sctp_tsnmap *map = (struct sctp_tsnmap *)&asoc->peer.tsn_map;
+	struct sctp_association *aptr = (struct sctp_association *)asoc;
 	struct sctp_gap_ack_block gabs[SCTP_MAX_GABS];
+	__u16 num_gabs, num_dup_tsns;
 	struct sctp_transport *trans;
+	struct sctp_chunk *retval;
+	struct sctp_sackhdr sack;
+	__u32 ctsn;
+	int len;
 
 	memset(gabs, 0, sizeof(gabs));
 	ctsn = sctp_tsnmap_get_ctsn(map);
@@ -879,7 +876,7 @@ nodata:
 }
 
 struct sctp_chunk *sctp_make_shutdown_ack(const struct sctp_association *asoc,
-				     const struct sctp_chunk *chunk)
+					  const struct sctp_chunk *chunk)
 {
 	struct sctp_chunk *retval;
 
@@ -902,8 +899,8 @@ struct sctp_chunk *sctp_make_shutdown_ack(const struct sctp_association *asoc,
 }
 
 struct sctp_chunk *sctp_make_shutdown_complete(
-	const struct sctp_association *asoc,
-	const struct sctp_chunk *chunk)
+					const struct sctp_association *asoc,
+					const struct sctp_chunk *chunk)
 {
 	struct sctp_chunk *retval;
 	__u8 flags = 0;
@@ -936,8 +933,8 @@ struct sctp_chunk *sctp_make_shutdown_complete(
  * association, except when responding to an INIT (sctpimpguide 2.41).
  */
 struct sctp_chunk *sctp_make_abort(const struct sctp_association *asoc,
-			      const struct sctp_chunk *chunk,
-			      const size_t hint)
+				   const struct sctp_chunk *chunk,
+				   const size_t hint)
 {
 	struct sctp_chunk *retval;
 	__u8 flags = 0;
@@ -973,8 +970,9 @@ struct sctp_chunk *sctp_make_abort(const struct sctp_association *asoc,
 
 /* Helper to create ABORT with a NO_USER_DATA error.  */
 struct sctp_chunk *sctp_make_abort_no_data(
-	const struct sctp_association *asoc,
-	const struct sctp_chunk *chunk, __u32 tsn)
+					const struct sctp_association *asoc,
+					const struct sctp_chunk *chunk,
+					__u32 tsn)
 {
 	struct sctp_chunk *retval;
 	__be32 payload;
@@ -1054,8 +1052,8 @@ err_chunk:
 static void *sctp_addto_param(struct sctp_chunk *chunk, int len,
 			      const void *data)
 {
-	void *target;
 	int chunklen = ntohs(chunk->chunk_hdr->length);
+	void *target;
 
 	target = skb_put(chunk->skb, len);
 
@@ -1073,10 +1071,10 @@ static void *sctp_addto_param(struct sctp_chunk *chunk, int len,
 
 /* Make an ABORT chunk with a PROTOCOL VIOLATION cause code. */
 struct sctp_chunk *sctp_make_abort_violation(
-	const struct sctp_association *asoc,
-	const struct sctp_chunk *chunk,
-	const __u8   *payload,
-	const size_t paylen)
+					const struct sctp_association *asoc,
+					const struct sctp_chunk *chunk,
+					const __u8 *payload,
+					const size_t paylen)
 {
 	struct sctp_chunk  *retval;
 	struct sctp_paramhdr phdr;
@@ -1099,14 +1097,14 @@ end:
 }
 
 struct sctp_chunk *sctp_make_violation_paramlen(
-	const struct sctp_association *asoc,
-	const struct sctp_chunk *chunk,
-	struct sctp_paramhdr *param)
+					const struct sctp_association *asoc,
+					const struct sctp_chunk *chunk,
+					struct sctp_paramhdr *param)
 {
-	struct sctp_chunk *retval;
 	static const char error[] = "The following parameter had invalid length:";
 	size_t payload_len = sizeof(error) + sizeof(struct sctp_errhdr) +
 			     sizeof(*param);
+	struct sctp_chunk *retval;
 
 	retval = sctp_make_abort(asoc, chunk, payload_len);
 	if (!retval)
@@ -1122,12 +1120,12 @@ nodata:
 }
 
 struct sctp_chunk *sctp_make_violation_max_retrans(
-	const struct sctp_association *asoc,
-	const struct sctp_chunk *chunk)
+					const struct sctp_association *asoc,
+					const struct sctp_chunk *chunk)
 {
-	struct sctp_chunk *retval;
 	static const char error[] = "Association exceeded its max_retans count";
 	size_t payload_len = sizeof(error) + sizeof(struct sctp_errhdr);
+	struct sctp_chunk *retval;
 
 	retval = sctp_make_abort(asoc, chunk, payload_len);
 	if (!retval)
@@ -1171,8 +1169,9 @@ nodata:
 }
 
 struct sctp_chunk *sctp_make_heartbeat_ack(const struct sctp_association *asoc,
-				      const struct sctp_chunk *chunk,
-				      const void *payload, const size_t paylen)
+					   const struct sctp_chunk *chunk,
+					   const void *payload,
+					   const size_t paylen)
 {
 	struct sctp_chunk *retval;
 
@@ -1203,9 +1202,9 @@ nodata:
  * This routine can be used for containing multiple causes in the chunk.
  */
 static struct sctp_chunk *sctp_make_op_error_space(
-	const struct sctp_association *asoc,
-	const struct sctp_chunk *chunk,
-	size_t size)
+					const struct sctp_association *asoc,
+					const struct sctp_chunk *chunk,
+					size_t size)
 {
 	struct sctp_chunk *retval;
 
@@ -1237,8 +1236,8 @@ nodata:
  * to report all the errors, if the incoming chunk is large
  */
 static inline struct sctp_chunk *sctp_make_op_error_fixed(
-	const struct sctp_association *asoc,
-	const struct sctp_chunk *chunk)
+					const struct sctp_association *asoc,
+					const struct sctp_chunk *chunk)
 {
 	size_t size = asoc ? asoc->pathmtu : 0;
 
@@ -1250,9 +1249,9 @@ static inline struct sctp_chunk *sctp_make_op_error_fixed(
 
 /* Create an Operation Error chunk.  */
 struct sctp_chunk *sctp_make_op_error(const struct sctp_association *asoc,
-				 const struct sctp_chunk *chunk,
-				 __be16 cause_code, const void *payload,
-				 size_t paylen, size_t reserve_tail)
+				      const struct sctp_chunk *chunk,
+				      __be16 cause_code, const void *payload,
+				      size_t paylen, size_t reserve_tail)
 {
 	struct sctp_chunk *retval;
 
@@ -1271,9 +1270,9 @@ nodata:
 
 struct sctp_chunk *sctp_make_auth(const struct sctp_association *asoc)
 {
-	struct sctp_chunk *retval;
-	struct sctp_hmac *hmac_desc;
 	struct sctp_authhdr auth_hdr;
+	struct sctp_hmac *hmac_desc;
+	struct sctp_chunk *retval;
 	__u8 *hmac;
 
 	/* Get the first hmac that the peer told us to use */
@@ -1319,8 +1318,8 @@ struct sctp_chunk *sctp_make_auth(const struct sctp_association *asoc)
  *
  */
 struct sctp_chunk *sctp_chunkify(struct sk_buff *skb,
-			    const struct sctp_association *asoc,
-			    struct sock *sk, gfp_t gfp)
+				 const struct sctp_association *asoc,
+				 struct sock *sk, gfp_t gfp)
 {
 	struct sctp_chunk *retval;
 
@@ -1372,11 +1371,11 @@ const union sctp_addr *sctp_source(const struct sctp_chunk *chunk)
  * arguments, reserving enough space for a 'paylen' byte payload.
  */
 static struct sctp_chunk *_sctp_make_chunk(const struct sctp_association *asoc,
-					    __u8 type, __u8 flags, int paylen,
-					    gfp_t gfp)
+					   __u8 type, __u8 flags, int paylen,
+					   gfp_t gfp)
 {
-	struct sctp_chunk *retval;
 	struct sctp_chunkhdr *chunk_hdr;
+	struct sctp_chunk *retval;
 	struct sk_buff *skb;
 	struct sock *sk;
 
@@ -1470,9 +1469,9 @@ void sctp_chunk_put(struct sctp_chunk *ch)
  */
 void *sctp_addto_chunk(struct sctp_chunk *chunk, int len, const void *data)
 {
-	void *target;
 	int chunklen = ntohs(chunk->chunk_hdr->length);
 	int padlen = SCTP_PAD4(chunklen) - chunklen;
+	void *target;
 
 	skb_put_zero(chunk->skb, padlen);
 	target = skb_put_data(chunk->skb, data, len);
@@ -1525,11 +1524,10 @@ int sctp_user_addto_chunk(struct sctp_chunk *chunk, int len,
  */
 void sctp_chunk_assign_ssn(struct sctp_chunk *chunk)
 {
-	struct sctp_datamsg *msg;
-	struct sctp_chunk *lchunk;
 	struct sctp_stream *stream;
-	__u16 ssn;
-	__u16 sid;
+	struct sctp_chunk *lchunk;
+	struct sctp_datamsg *msg;
+	__u16 ssn, sid;
 
 	if (chunk->has_ssn)
 		return;
@@ -1574,8 +1572,8 @@ void sctp_chunk_assign_tsn(struct sctp_chunk *chunk)
 
 /* Create a CLOSED association to use with an incoming packet.  */
 struct sctp_association *sctp_make_temp_asoc(const struct sctp_endpoint *ep,
-					struct sctp_chunk *chunk,
-					gfp_t gfp)
+					     struct sctp_chunk *chunk,
+					     gfp_t gfp)
 {
 	struct sctp_association *asoc;
 	enum sctp_scope scope;
@@ -1602,8 +1600,8 @@ static struct sctp_cookie_param *sctp_pack_cookie(
 					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
 					const struct sctp_chunk *init_chunk,
-					int *cookie_len,
-					const __u8 *raw_addrs, int addrs_len)
+					int *cookie_len, const __u8 *raw_addrs,
+					int addrs_len)
 {
 	struct sctp_signed_cookie *cookie;
 	struct sctp_cookie_param *retval;
@@ -1690,19 +1688,19 @@ nodata:
 
 /* Unpack the cookie from COOKIE ECHO chunk, recreating the association.  */
 struct sctp_association *sctp_unpack_cookie(
-	const struct sctp_endpoint *ep,
-	const struct sctp_association *asoc,
-	struct sctp_chunk *chunk, gfp_t gfp,
-	int *error, struct sctp_chunk **errp)
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					struct sctp_chunk *chunk, gfp_t gfp,
+					int *error, struct sctp_chunk **errp)
 {
 	struct sctp_association *retval = NULL;
+	int headersize, bodysize, fixed_size;
 	struct sctp_signed_cookie *cookie;
+	struct sk_buff *skb = chunk->skb;
 	struct sctp_cookie *bear_cookie;
-	int headersize, bodysize, fixed_size;
 	__u8 *digest = ep->digest;
-	unsigned int len;
 	enum sctp_scope scope;
-	struct sk_buff *skb = chunk->skb;
+	unsigned int len;
 	ktime_t kt;
 
 	/* Header size is static data prior to the actual cookie, including
@@ -1974,8 +1972,8 @@ static int sctp_process_hn_param(const struct sctp_association *asoc,
 static int sctp_verify_ext_param(struct net *net, union sctp_params param)
 {
 	__u16 num_ext = ntohs(param.p->length) - sizeof(struct sctp_paramhdr);
-	int have_auth = 0;
 	int have_asconf = 0;
+	int have_auth = 0;
 	int i;
 
 	for (i = 0; i < num_ext; i++) {
@@ -2005,10 +2003,10 @@ static int sctp_verify_ext_param(struct net *net, union sctp_params param)
 }
 
 static void sctp_process_ext_param(struct sctp_association *asoc,
-				    union sctp_params param)
+				   union sctp_params param)
 {
-	struct net *net = sock_net(asoc->base.sk);
 	__u16 num_ext = ntohs(param.p->length) - sizeof(struct sctp_paramhdr);
+	struct net *net = sock_net(asoc->base.sk);
 	int i;
 
 	for (i = 0; i < num_ext; i++) {
@@ -2309,13 +2307,13 @@ int sctp_process_init(struct sctp_association *asoc, struct sctp_chunk *chunk,
 		      struct sctp_init_chunk *peer_init, gfp_t gfp)
 {
 	struct net *net = sock_net(asoc->base.sk);
-	union sctp_params param;
 	struct sctp_transport *transport;
 	struct list_head *pos, *temp;
-	struct sctp_af *af;
+	union sctp_params param;
 	union sctp_addr addr;
-	char *cookie;
+	struct sctp_af *af;
 	int src_match = 0;
+	char *cookie;
 
 	/* We must include the address that the INIT packet came from.
 	 * This is the only address that matters for an INIT packet.
@@ -2499,16 +2497,15 @@ static int sctp_process_param(struct sctp_association *asoc,
 			      gfp_t gfp)
 {
 	struct net *net = sock_net(asoc->base.sk);
-	union sctp_addr addr;
-	int i;
-	__u16 sat;
-	int retval = 1;
-	enum sctp_scope scope;
-	u32 stale;
-	struct sctp_af *af;
+	struct sctp_endpoint *ep = asoc->ep;
 	union sctp_addr_param *addr_param;
 	struct sctp_transport *t;
-	struct sctp_endpoint *ep = asoc->ep;
+	enum sctp_scope scope;
+	union sctp_addr addr;
+	struct sctp_af *af;
+	int retval = 1, i;
+	u32 stale;
+	__u16 sat;
 
 	/* We maintain all INIT parameters in network byte order all the
 	 * time.  This allows us to not worry about whether the parameters
@@ -2806,22 +2803,20 @@ static struct sctp_chunk *sctp_make_asconf(struct sctp_association *asoc,
  *
  */
 struct sctp_chunk *sctp_make_asconf_update_ip(struct sctp_association *asoc,
-					      union sctp_addr	      *laddr,
-					      struct sockaddr	      *addrs,
-					      int		      addrcnt,
-					      __be16		      flags)
+					      union sctp_addr *laddr,
+					      struct sockaddr *addrs,
+					      int addrcnt, __be16 flags)
 {
+	union sctp_addr_param addr_param;
 	struct sctp_addip_param	param;
-	struct sctp_chunk	*retval;
-	union sctp_addr_param	addr_param;
-	union sctp_addr		*addr;
-	void			*addr_buf;
-	struct sctp_af		*af;
-	int			paramlen = sizeof(param);
-	int			addr_param_len = 0;
-	int 			totallen = 0;
-	int 			i;
-	int			del_pickup = 0;
+	int paramlen = sizeof(param);
+	struct sctp_chunk *retval;
+	int addr_param_len = 0;
+	union sctp_addr *addr;
+	int totallen = 0, i;
+	int del_pickup = 0;
+	struct sctp_af *af;
+	void *addr_buf;
 
 	/* Get total length of all the address parameters. */
 	addr_buf = addrs;
@@ -2897,12 +2892,12 @@ struct sctp_chunk *sctp_make_asconf_update_ip(struct sctp_association *asoc,
 struct sctp_chunk *sctp_make_asconf_set_prim(struct sctp_association *asoc,
 					     union sctp_addr *addr)
 {
+	struct sctp_af *af = sctp_get_af_specific(addr->v4.sin_family);
+	union sctp_addr_param addrparam;
 	struct sctp_addip_param	param;
-	struct sctp_chunk 	*retval;
-	int 			len = sizeof(param);
-	union sctp_addr_param	addrparam;
-	int			addrlen;
-	struct sctp_af		*af = sctp_get_af_specific(addr->v4.sin_family);
+	struct sctp_chunk *retval;
+	int len = sizeof(param);
+	int addrlen;
 
 	addrlen = af->to_addr_param(addr, &addrparam);
 	if (!addrlen)
@@ -2946,9 +2941,9 @@ struct sctp_chunk *sctp_make_asconf_set_prim(struct sctp_association *asoc,
 static struct sctp_chunk *sctp_make_asconf_ack(const struct sctp_association *asoc,
 					       __u32 serial, int vparam_len)
 {
-	struct sctp_addiphdr	asconf;
-	struct sctp_chunk	*retval;
-	int			length = sizeof(asconf) + vparam_len;
+	struct sctp_addiphdr asconf;
+	struct sctp_chunk *retval;
+	int length = sizeof(asconf) + vparam_len;
 
 	/* Create the chunk.  */
 	retval = sctp_make_control(asoc, SCTP_CID_ASCONF_ACK, 0, length,
@@ -2970,10 +2965,10 @@ static void sctp_add_asconf_response(struct sctp_chunk *chunk, __be32 crr_id,
 				     struct sctp_addip_param *asconf_param)
 {
 	struct sctp_addip_param ack_param;
-	struct sctp_errhdr	err_param;
-	int			asconf_param_len = 0;
-	int			err_param_len = 0;
-	__be16			response_type;
+	struct sctp_errhdr err_param;
+	int asconf_param_len = 0;
+	int err_param_len = 0;
+	__be16 response_type;
 
 	if (SCTP_ERROR_NO_ERROR == err_code) {
 		response_type = SCTP_PARAM_SUCCESS_REPORT;
@@ -3011,10 +3006,10 @@ static __be16 sctp_process_asconf_param(struct sctp_association *asoc,
 					struct sctp_chunk *asconf,
 					struct sctp_addip_param *asconf_param)
 {
+	union sctp_addr_param *addr_param;
 	struct sctp_transport *peer;
-	struct sctp_af *af;
 	union sctp_addr	addr;
-	union sctp_addr_param *addr_param;
+	struct sctp_af *af;
 
 	addr_param = (void *)asconf_param + sizeof(*asconf_param);
 
@@ -3142,8 +3137,8 @@ bool sctp_verify_asconf(const struct sctp_association *asoc,
 			struct sctp_paramhdr **errp)
 {
 	struct sctp_addip_chunk *addip;
-	union sctp_params param;
 	bool addr_param_seen = false;
+	union sctp_params param;
 
 	addip = (struct sctp_addip_chunk *)chunk->chunk_hdr;
 	sctp_walk_params(param, addip, addip_hdr.params) {
@@ -3209,16 +3204,15 @@ bool sctp_verify_asconf(const struct sctp_association *asoc,
 struct sctp_chunk *sctp_process_asconf(struct sctp_association *asoc,
 				       struct sctp_chunk *asconf)
 {
+	union sctp_addr_param *addr_param;
 	struct sctp_addip_chunk *addip;
+	struct sctp_chunk *asconf_ack;
 	bool all_param_pass = true;
+	struct sctp_addiphdr *hdr;
+	int length = 0, chunk_len;
 	union sctp_params param;
-	struct sctp_addiphdr	*hdr;
-	union sctp_addr_param	*addr_param;
-	struct sctp_chunk	*asconf_ack;
-	__be16	err_code;
-	int	length = 0;
-	int	chunk_len;
-	__u32	serial;
+	__be16 err_code;
+	__u32 serial;
 
 	addip = (struct sctp_addip_chunk *)asconf->chunk_hdr;
 	chunk_len = ntohs(asconf->chunk_hdr->length) -
@@ -3295,12 +3289,12 @@ done:
 static void sctp_asconf_param_success(struct sctp_association *asoc,
 				      struct sctp_addip_param *asconf_param)
 {
-	struct sctp_af *af;
-	union sctp_addr	addr;
 	struct sctp_bind_addr *bp = &asoc->base.bind_addr;
 	union sctp_addr_param *addr_param;
-	struct sctp_transport *transport;
 	struct sctp_sockaddr_entry *saddr;
+	struct sctp_transport *transport;
+	union sctp_addr	addr;
+	struct sctp_af *af;
 
 	addr_param = (void *)asconf_param + sizeof(*asconf_param);
 
@@ -3357,10 +3351,10 @@ static __be16 sctp_get_asconf_response(struct sctp_chunk *asconf_ack,
 				       int no_err)
 {
 	struct sctp_addip_param	*asconf_ack_param;
-	struct sctp_errhdr	*err_param;
-	int			length;
-	int			asconf_ack_len;
-	__be16			err_code;
+	struct sctp_errhdr *err_param;
+	int asconf_ack_len;
+	__be16 err_code;
+	int length;
 
 	if (no_err)
 		err_code = SCTP_ERROR_NO_ERROR;
@@ -3409,15 +3403,15 @@ static __be16 sctp_get_asconf_response(struct sctp_chunk *asconf_ack,
 int sctp_process_asconf_ack(struct sctp_association *asoc,
 			    struct sctp_chunk *asconf_ack)
 {
-	struct sctp_chunk	*asconf = asoc->addip_last_asconf;
-	union sctp_addr_param	*addr_param;
-	struct sctp_addip_param	*asconf_param;
-	int	length = 0;
-	int	asconf_len = asconf->skb->len;
-	int	all_param_pass = 0;
-	int	no_err = 1;
-	int	retval = 0;
-	__be16	err_code = SCTP_ERROR_NO_ERROR;
+	struct sctp_chunk *asconf = asoc->addip_last_asconf;
+	struct sctp_addip_param *asconf_param;
+	__be16 err_code = SCTP_ERROR_NO_ERROR;
+	union sctp_addr_param *addr_param;
+	int asconf_len = asconf->skb->len;
+	int all_param_pass = 0;
+	int length = 0;
+	int no_err = 1;
+	int retval = 0;
 
 	/* Skip the chunkhdr and addiphdr from the last asconf sent and store
 	 * a pointer to address parameter.
@@ -3544,9 +3538,8 @@ struct sctp_chunk *sctp_make_fwdtsn(const struct sctp_association *asoc,
  *  \                                                               \
  *  +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  */
-static struct sctp_chunk *sctp_make_reconf(
-				const struct sctp_association *asoc,
-				int length)
+static struct sctp_chunk *sctp_make_reconf(const struct sctp_association *asoc,
+					   int length)
 {
 	struct sctp_reconf_chunk *reconf;
 	struct sctp_chunk *retval;
@@ -3597,9 +3590,9 @@ static struct sctp_chunk *sctp_make_reconf(
  *  +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  */
 struct sctp_chunk *sctp_make_strreset_req(
-				const struct sctp_association *asoc,
-				__u16 stream_num, __u16 *stream_list,
-				bool out, bool in)
+					const struct sctp_association *asoc,
+					__u16 stream_num, __u16 *stream_list,
+					bool out, bool in)
 {
 	struct sctp_strreset_outreq outreq;
 	__u16 stream_len = stream_num * 2;
@@ -3651,7 +3644,7 @@ struct sctp_chunk *sctp_make_strreset_req(
  *  +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  */
 struct sctp_chunk *sctp_make_strreset_tsnreq(
-				const struct sctp_association *asoc)
+					const struct sctp_association *asoc)
 {
 	struct sctp_strreset_tsnreq tsnreq;
 	__u16 length = sizeof(tsnreq);
@@ -3682,8 +3675,8 @@ struct sctp_chunk *sctp_make_strreset_tsnreq(
  *  +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  */
 struct sctp_chunk *sctp_make_strreset_addstrm(
-				const struct sctp_association *asoc,
-				__u16 out, __u16 in)
+					const struct sctp_association *asoc,
+					__u16 out, __u16 in)
 {
 	struct sctp_strreset_addstrm addstrm;
 	__u16 size = sizeof(addstrm);
@@ -3727,9 +3720,8 @@ struct sctp_chunk *sctp_make_strreset_addstrm(
  *  |                            Result                             |
  *  +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  */
-struct sctp_chunk *sctp_make_strreset_resp(
-				const struct sctp_association *asoc,
-				__u32 result, __u32 sn)
+struct sctp_chunk *sctp_make_strreset_resp(const struct sctp_association *asoc,
+					   __u32 result, __u32 sn)
 {
 	struct sctp_strreset_resp resp;
 	__u16 length = sizeof(resp);
@@ -3764,10 +3756,10 @@ struct sctp_chunk *sctp_make_strreset_resp(
  *  |                  Receiver's Next TSN (optional)               |
  *  +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  */
-struct sctp_chunk *sctp_make_strreset_tsnresp(
-					struct sctp_association *asoc,
-					__u32 result, __u32 sn,
-					__u32 sender_tsn, __u32 receiver_tsn)
+struct sctp_chunk *sctp_make_strreset_tsnresp(struct sctp_association *asoc,
+					      __u32 result, __u32 sn,
+					      __u32 sender_tsn,
+					      __u32 receiver_tsn)
 {
 	struct sctp_strreset_resptsn tsnresp;
 	__u16 length = sizeof(tsnresp);
-- 
cgit v1.2.3


From 861932ecc36063196c80ca3e240502b0f6d0e977 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Wed, 9 Aug 2017 14:30:31 +0200
Subject: net: sched: Add helpers to identify classids

Offloading drivers need to understand what qdisc class a filter is added
to. Currently they only need to identify ingress, clsact->ingress and
clsact->egress. So provide these helpers.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/pkt_sched.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'include/net')

diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index 2579c209ea51..259bc191ba59 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -5,6 +5,7 @@
 #include <linux/ktime.h>
 #include <linux/if_vlan.h>
 #include <net/sch_generic.h>
+#include <uapi/linux/pkt_sched.h>
 
 #define DEFAULT_TX_QUEUE_LEN	1000
 
@@ -132,4 +133,17 @@ static inline unsigned int psched_mtu(const struct net_device *dev)
 	return dev->mtu + dev->hard_header_len;
 }
 
+static inline bool is_classid_clsact_ingress(u32 classid)
+{
+	/* This also returns true for ingress qdisc */
+	return TC_H_MAJ(classid) == TC_H_MAJ(TC_H_CLSACT) &&
+	       TC_H_MIN(classid) != TC_H_MIN(TC_H_MIN_EGRESS);
+}
+
+static inline bool is_classid_clsact_egress(u32 classid)
+{
+	return TC_H_MAJ(classid) == TC_H_MAJ(TC_H_CLSACT) &&
+	       TC_H_MIN(classid) == TC_H_MIN(TC_H_MIN_EGRESS);
+}
+
 #endif
-- 
cgit v1.2.3


From 7690f2a51d8afe51ac97e7fae66b081f192a7158 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Wed, 9 Aug 2017 14:30:32 +0200
Subject: net: sched: propagate classid down to offload drivers

Drivers need classid to decide they support this specific qdisc+class
or not. So propagate it down via the tc_cls_common_offload struct.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/pkt_cls.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/net')

diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 0f78e6560b2d..1f1de20e584f 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -410,6 +410,7 @@ struct tc_cls_common_offload {
 	u32 chain_index;
 	__be16 protocol;
 	u32 prio;
+	u32 classid;
 };
 
 static inline void
@@ -420,6 +421,7 @@ tc_cls_common_offload_init(struct tc_cls_common_offload *cls_common,
 	cls_common->chain_index = tp->chain->index;
 	cls_common->protocol = tp->protocol;
 	cls_common->prio = tp->prio;
+	cls_common->classid = tp->classid;
 }
 
 struct tc_cls_u32_knode {
-- 
cgit v1.2.3


From 237f79d24ebe1eb9b5651b7342ba5cc9d9b8f222 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Wed, 9 Aug 2017 14:30:34 +0200
Subject: net: sched: remove handle propagation down to the drivers

There is no longer need to use handle in drivers, so remove it from
tc_cls_common_offload struct.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/pkt_cls.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/net')

diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 1f1de20e584f..bd9dd79357fe 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -406,7 +406,6 @@ tcf_match_indev(struct sk_buff *skb, int ifindex)
 #endif /* CONFIG_NET_CLS_IND */
 
 struct tc_cls_common_offload {
-	u32 handle;
 	u32 chain_index;
 	__be16 protocol;
 	u32 prio;
@@ -417,7 +416,6 @@ static inline void
 tc_cls_common_offload_init(struct tc_cls_common_offload *cls_common,
 			   const struct tcf_proto *tp)
 {
-	cls_common->handle = tp->q->handle;
 	cls_common->chain_index = tp->chain->index;
 	cls_common->protocol = tp->protocol;
 	cls_common->prio = tp->prio;
-- 
cgit v1.2.3


From 7b06e8aed283081010596c98a67f06c595affe51 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Wed, 9 Aug 2017 14:30:35 +0200
Subject: net: sched: remove cops->tcf_cl_offload

cops->tcf_cl_offload is no longer needed, as the drivers check what they
can and cannot offload using the classid identify helpers. So remove this.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/pkt_cls.h     | 14 +++-----------
 include/net/sch_generic.h |  1 -
 net/sched/cls_bpf.c       |  4 ++--
 net/sched/cls_flower.c    |  8 ++++----
 net/sched/cls_matchall.c  |  4 ++--
 net/sched/cls_u32.c       |  8 ++++----
 net/sched/sch_ingress.c   | 12 ------------
 7 files changed, 15 insertions(+), 36 deletions(-)

(limited to 'include/net')

diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index bd9dd79357fe..e80edd8879ef 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -457,19 +457,12 @@ struct tc_cls_u32_offload {
 	};
 };
 
-static inline bool tc_can_offload(const struct net_device *dev,
-				  const struct tcf_proto *tp)
+static inline bool tc_can_offload(const struct net_device *dev)
 {
-	const struct Qdisc *sch = tp->q;
-	const struct Qdisc_class_ops *cops = sch->ops->cl_ops;
-
 	if (!(dev->features & NETIF_F_HW_TC))
 		return false;
 	if (!dev->netdev_ops->ndo_setup_tc)
 		return false;
-	if (cops && cops->tcf_cl_offload)
-		return cops->tcf_cl_offload(tp->classid);
-
 	return true;
 }
 
@@ -478,12 +471,11 @@ static inline bool tc_skip_hw(u32 flags)
 	return (flags & TCA_CLS_FLAGS_SKIP_HW) ? true : false;
 }
 
-static inline bool tc_should_offload(const struct net_device *dev,
-				     const struct tcf_proto *tp, u32 flags)
+static inline bool tc_should_offload(const struct net_device *dev, u32 flags)
 {
 	if (tc_skip_hw(flags))
 		return false;
-	return tc_can_offload(dev, tp);
+	return tc_can_offload(dev);
 }
 
 static inline bool tc_skip_sw(u32 flags)
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index e79f5ad1c5f3..5865db91976b 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -156,7 +156,6 @@ struct Qdisc_class_ops {
 
 	/* Filter manipulation */
 	struct tcf_block *	(*tcf_block)(struct Qdisc *, unsigned long);
-	bool			(*tcf_cl_offload)(u32 classid);
 	unsigned long		(*bind_tcf)(struct Qdisc *, unsigned long,
 					u32 classid);
 	void			(*unbind_tcf)(struct Qdisc *, unsigned long);
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index db17b68df94e..6f2dffe30f25 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -178,7 +178,7 @@ static int cls_bpf_offload(struct tcf_proto *tp, struct cls_bpf_prog *prog,
 		(oldprog && tc_skip_sw(oldprog->gen_flags));
 
 	if (oldprog && oldprog->offloaded) {
-		if (tc_should_offload(dev, tp, prog->gen_flags)) {
+		if (tc_should_offload(dev, prog->gen_flags)) {
 			cmd = TC_CLSBPF_REPLACE;
 		} else if (!tc_skip_sw(prog->gen_flags)) {
 			obj = oldprog;
@@ -187,7 +187,7 @@ static int cls_bpf_offload(struct tcf_proto *tp, struct cls_bpf_prog *prog,
 			return -EINVAL;
 		}
 	} else {
-		if (!tc_should_offload(dev, tp, prog->gen_flags))
+		if (!tc_should_offload(dev, prog->gen_flags))
 			return skip_sw ? -EINVAL : 0;
 		cmd = TC_CLSBPF_ADD;
 	}
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index d2551a03c542..052e902dc71c 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -227,7 +227,7 @@ static void fl_hw_destroy_filter(struct tcf_proto *tp, struct cls_fl_filter *f)
 	struct tc_cls_flower_offload cls_flower = {};
 	struct net_device *dev = f->hw_dev;
 
-	if (!tc_can_offload(dev, tp))
+	if (!tc_can_offload(dev))
 		return;
 
 	tc_cls_common_offload_init(&cls_flower.common, tp);
@@ -246,9 +246,9 @@ static int fl_hw_replace_filter(struct tcf_proto *tp,
 	struct tc_cls_flower_offload cls_flower = {};
 	int err;
 
-	if (!tc_can_offload(dev, tp)) {
+	if (!tc_can_offload(dev)) {
 		if (tcf_exts_get_dev(dev, &f->exts, &f->hw_dev) ||
-		    (f->hw_dev && !tc_can_offload(f->hw_dev, tp))) {
+		    (f->hw_dev && !tc_can_offload(f->hw_dev))) {
 			f->hw_dev = dev;
 			return tc_skip_sw(f->flags) ? -EINVAL : 0;
 		}
@@ -281,7 +281,7 @@ static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f)
 	struct tc_cls_flower_offload cls_flower = {};
 	struct net_device *dev = f->hw_dev;
 
-	if (!tc_can_offload(dev, tp))
+	if (!tc_can_offload(dev))
 		return;
 
 	tc_cls_common_offload_init(&cls_flower.common, tp);
diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c
index d44e26fdae84..d4dc387f7a56 100644
--- a/net/sched/cls_matchall.c
+++ b/net/sched/cls_matchall.c
@@ -92,7 +92,7 @@ static void mall_destroy(struct tcf_proto *tp)
 	if (!head)
 		return;
 
-	if (tc_should_offload(dev, tp, head->flags))
+	if (tc_should_offload(dev, head->flags))
 		mall_destroy_hw_filter(tp, head, (unsigned long) head);
 
 	call_rcu(&head->rcu, mall_destroy_rcu);
@@ -172,7 +172,7 @@ static int mall_change(struct net *net, struct sk_buff *in_skb,
 	if (err)
 		goto err_set_parms;
 
-	if (tc_should_offload(dev, tp, flags)) {
+	if (tc_should_offload(dev, flags)) {
 		err = mall_replace_hw_filter(tp, new, (unsigned long) new);
 		if (err) {
 			if (tc_skip_sw(flags))
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 5a3f78181526..af22742d2847 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -433,7 +433,7 @@ static void u32_remove_hw_knode(struct tcf_proto *tp, u32 handle)
 	struct net_device *dev = tp->q->dev_queue->dev;
 	struct tc_cls_u32_offload cls_u32 = {};
 
-	if (!tc_should_offload(dev, tp, 0))
+	if (!tc_should_offload(dev, 0))
 		return;
 
 	tc_cls_common_offload_init(&cls_u32.common, tp);
@@ -450,7 +450,7 @@ static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h,
 	struct tc_cls_u32_offload cls_u32 = {};
 	int err;
 
-	if (!tc_should_offload(dev, tp, flags))
+	if (!tc_should_offload(dev, flags))
 		return tc_skip_sw(flags) ? -EINVAL : 0;
 
 	tc_cls_common_offload_init(&cls_u32.common, tp);
@@ -471,7 +471,7 @@ static void u32_clear_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h)
 	struct net_device *dev = tp->q->dev_queue->dev;
 	struct tc_cls_u32_offload cls_u32 = {};
 
-	if (!tc_should_offload(dev, tp, 0))
+	if (!tc_should_offload(dev, 0))
 		return;
 
 	tc_cls_common_offload_init(&cls_u32.common, tp);
@@ -490,7 +490,7 @@ static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n,
 	struct tc_cls_u32_offload cls_u32 = {};
 	int err;
 
-	if (!tc_should_offload(dev, tp, flags))
+	if (!tc_should_offload(dev, flags))
 		return tc_skip_sw(flags) ? -EINVAL : 0;
 
 	tc_cls_common_offload_init(&cls_u32.common, tp);
diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c
index d8a9bebcab90..a15c543c3569 100644
--- a/net/sched/sch_ingress.c
+++ b/net/sched/sch_ingress.c
@@ -32,11 +32,6 @@ static unsigned long ingress_get(struct Qdisc *sch, u32 classid)
 	return TC_H_MIN(classid) + 1;
 }
 
-static bool ingress_cl_offload(u32 classid)
-{
-	return true;
-}
-
 static unsigned long ingress_bind_filter(struct Qdisc *sch,
 					 unsigned long parent, u32 classid)
 {
@@ -103,7 +98,6 @@ static const struct Qdisc_class_ops ingress_class_ops = {
 	.put		=	ingress_put,
 	.walk		=	ingress_walk,
 	.tcf_block	=	ingress_tcf_block,
-	.tcf_cl_offload	=	ingress_cl_offload,
 	.bind_tcf	=	ingress_bind_filter,
 	.unbind_tcf	=	ingress_put,
 };
@@ -134,11 +128,6 @@ static unsigned long clsact_get(struct Qdisc *sch, u32 classid)
 	}
 }
 
-static bool clsact_cl_offload(u32 classid)
-{
-	return TC_H_MIN(classid) == TC_H_MIN(TC_H_MIN_INGRESS);
-}
-
 static unsigned long clsact_bind_filter(struct Qdisc *sch,
 					unsigned long parent, u32 classid)
 {
@@ -198,7 +187,6 @@ static const struct Qdisc_class_ops clsact_class_ops = {
 	.put		=	ingress_put,
 	.walk		=	ingress_walk,
 	.tcf_block	=	clsact_tcf_block,
-	.tcf_cl_offload	=	clsact_cl_offload,
 	.bind_tcf	=	clsact_bind_filter,
 	.unbind_tcf	=	ingress_put,
 };
-- 
cgit v1.2.3


From fe4007999599c02598c17b643e8de43e487d48e8 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Tue, 15 Aug 2017 09:09:49 +0200
Subject: ipv6: fib: Provide offload indication using nexthop flags

IPv6 routes currently lack nexthop flags as in IPv4. This has several
implications.

In the forwarding path, it requires us to check the carrier state of the
nexthop device and potentially ignore a linkdown route, instead of
checking for RTNH_F_LINKDOWN.

It also requires capable drivers to use the user facing IPv6-specific
route flags to provide offload indication, instead of using the nexthop
flags as in IPv4.

Add nexthop flags to IPv6 routes in the 40 bytes hole and use it to
provide offload indication instead of the RTF_OFFLOAD flag, which is
removed while it's still not part of any official kernel release.

In the near future we would like to use the field for the
RTNH_F_{LINKDOWN,DEAD} flags, but this change is more involved and might
not be ready in time for the current cycle.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Acked-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 8 ++++----
 include/net/ip6_fib.h                                 | 2 ++
 include/uapi/linux/ipv6_route.h                       | 1 -
 net/ipv6/route.c                                      | 7 +------
 4 files changed, 7 insertions(+), 11 deletions(-)

(limited to 'include/net')

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 16676fffbf70..4895d5b8942b 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -2397,7 +2397,7 @@ mlxsw_sp_fib6_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
 
 	if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL) {
 		list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6,
-				 list)->rt->rt6i_flags |= RTF_OFFLOAD;
+				 list)->rt->rt6i_nh_flags |= RTNH_F_OFFLOAD;
 		return;
 	}
 
@@ -2407,9 +2407,9 @@ mlxsw_sp_fib6_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
 
 		nh = mlxsw_sp_rt6_nexthop(nh_grp, mlxsw_sp_rt6);
 		if (nh && nh->offloaded)
-			mlxsw_sp_rt6->rt->rt6i_flags |= RTF_OFFLOAD;
+			mlxsw_sp_rt6->rt->rt6i_nh_flags |= RTNH_F_OFFLOAD;
 		else
-			mlxsw_sp_rt6->rt->rt6i_flags &= ~RTF_OFFLOAD;
+			mlxsw_sp_rt6->rt->rt6i_nh_flags &= ~RTNH_F_OFFLOAD;
 	}
 }
 
@@ -2424,7 +2424,7 @@ mlxsw_sp_fib6_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
 		struct rt6_info *rt = mlxsw_sp_rt6->rt;
 
-		rt->rt6i_flags &= ~RTF_OFFLOAD;
+		rt->rt6i_nh_flags &= ~RTNH_F_OFFLOAD;
 	}
 }
 
diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 1d790ea40ea7..71c1646298ae 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -120,6 +120,8 @@ struct rt6_info {
 
 	atomic_t			rt6i_ref;
 
+	unsigned int			rt6i_nh_flags;
+
 	/* These are in a separate cache line. */
 	struct rt6key			rt6i_dst ____cacheline_aligned_in_smp;
 	u32				rt6i_flags;
diff --git a/include/uapi/linux/ipv6_route.h b/include/uapi/linux/ipv6_route.h
index 33e2a5732bd1..d496c02e14bc 100644
--- a/include/uapi/linux/ipv6_route.h
+++ b/include/uapi/linux/ipv6_route.h
@@ -35,7 +35,6 @@
 #define RTF_PREF(pref)	((pref) << 27)
 #define RTF_PREF_MASK	0x18000000
 
-#define RTF_OFFLOAD	0x20000000	/* offloaded route		*/
 #define RTF_PCPU	0x40000000	/* read-only: can not be set by user */
 #define RTF_LOCAL	0x80000000
 
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 035762fed07d..6793135d49db 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1820,11 +1820,6 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
 		goto out;
 	}
 
-	if (cfg->fc_flags & RTF_OFFLOAD) {
-		NL_SET_ERR_MSG(extack, "Userspace can not set RTF_OFFLOAD");
-		goto out;
-	}
-
 	if (cfg->fc_dst_len > 128) {
 		NL_SET_ERR_MSG(extack, "Invalid prefix length");
 		goto out;
@@ -3335,7 +3330,7 @@ static int rt6_nexthop_info(struct sk_buff *skb, struct rt6_info *rt,
 			goto nla_put_failure;
 	}
 
-	if (rt->rt6i_flags & RTF_OFFLOAD)
+	if (rt->rt6i_nh_flags & RTNH_F_OFFLOAD)
 		*flags |= RTNH_F_OFFLOAD;
 
 	/* not needed for multipath encoding b/c it has a rtnexthop struct */
-- 
cgit v1.2.3


From 9620fef27ed2cdb37bf6fd028f32bea2ef5119a8 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 18 Aug 2017 12:08:07 -0700
Subject: ipv4: convert dst_metrics.refcnt from atomic_t to refcount_t

refcount_t type and corresponding API should be
used instead of atomic_t when the variable is used as
a reference counter. This allows to avoid accidental
refcounter overflows that might lead to use-after-free
situations.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dst.h        | 3 ++-
 net/core/dst.c           | 6 +++---
 net/ipv4/fib_semantics.c | 4 ++--
 net/ipv4/route.c         | 4 ++--
 4 files changed, 9 insertions(+), 8 deletions(-)

(limited to 'include/net')

diff --git a/include/net/dst.h b/include/net/dst.h
index f73611ec4017..93568bd0a352 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -14,6 +14,7 @@
 #include <linux/rcupdate.h>
 #include <linux/bug.h>
 #include <linux/jiffies.h>
+#include <linux/refcount.h>
 #include <net/neighbour.h>
 #include <asm/processor.h>
 
@@ -107,7 +108,7 @@ struct dst_entry {
 
 struct dst_metrics {
 	u32		metrics[RTAX_MAX];
-	atomic_t	refcnt;
+	refcount_t	refcnt;
 };
 extern const struct dst_metrics dst_default_metrics;
 
diff --git a/net/core/dst.c b/net/core/dst.c
index 00aa972ad1a1..d6ead757c258 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -55,7 +55,7 @@ const struct dst_metrics dst_default_metrics = {
 	 * We really want to avoid false sharing on this variable, and catch
 	 * any writes on it.
 	 */
-	.refcnt = ATOMIC_INIT(1),
+	.refcnt = REFCOUNT_INIT(1),
 };
 
 void dst_init(struct dst_entry *dst, struct dst_ops *ops,
@@ -213,7 +213,7 @@ u32 *dst_cow_metrics_generic(struct dst_entry *dst, unsigned long old)
 		struct dst_metrics *old_p = (struct dst_metrics *)__DST_METRICS_PTR(old);
 		unsigned long prev, new;
 
-		atomic_set(&p->refcnt, 1);
+		refcount_set(&p->refcnt, 1);
 		memcpy(p->metrics, old_p->metrics, sizeof(p->metrics));
 
 		new = (unsigned long) p;
@@ -225,7 +225,7 @@ u32 *dst_cow_metrics_generic(struct dst_entry *dst, unsigned long old)
 			if (prev & DST_METRICS_READ_ONLY)
 				p = NULL;
 		} else if (prev & DST_METRICS_REFCOUNTED) {
-			if (atomic_dec_and_test(&old_p->refcnt))
+			if (refcount_dec_and_test(&old_p->refcnt))
 				kfree(old_p);
 		}
 	}
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index d521caf57385..394d800db50c 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -220,7 +220,7 @@ static void free_fib_info_rcu(struct rcu_head *head)
 	} endfor_nexthops(fi);
 
 	m = fi->fib_metrics;
-	if (m != &dst_default_metrics && atomic_dec_and_test(&m->refcnt))
+	if (m != &dst_default_metrics && refcount_dec_and_test(&m->refcnt))
 		kfree(m);
 	kfree(fi);
 }
@@ -1090,7 +1090,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
 			kfree(fi);
 			return ERR_PTR(err);
 		}
-		atomic_set(&fi->fib_metrics->refcnt, 1);
+		refcount_set(&fi->fib_metrics->refcnt, 1);
 	} else {
 		fi->fib_metrics = (struct dst_metrics *)&dst_default_metrics;
 	}
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index d400c0543106..872b4cb136d3 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1398,7 +1398,7 @@ static void ipv4_dst_destroy(struct dst_entry *dst)
 	struct dst_metrics *p = (struct dst_metrics *)DST_METRICS_PTR(dst);
 	struct rtable *rt = (struct rtable *) dst;
 
-	if (p != &dst_default_metrics && atomic_dec_and_test(&p->refcnt))
+	if (p != &dst_default_metrics && refcount_dec_and_test(&p->refcnt))
 		kfree(p);
 
 	if (!list_empty(&rt->rt_uncached)) {
@@ -1456,7 +1456,7 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
 		dst_init_metrics(&rt->dst, fi->fib_metrics->metrics, true);
 		if (fi->fib_metrics != &dst_default_metrics) {
 			rt->dst._metrics |= DST_METRICS_REFCOUNTED;
-			atomic_inc(&fi->fib_metrics->refcnt);
+			refcount_inc(&fi->fib_metrics->refcnt);
 		}
 #ifdef CONFIG_IP_ROUTE_CLASSID
 		rt->dst.tclassid = nh->nh_tclassid;
-- 
cgit v1.2.3


From 89e49506bc62520f93e64a278293444319a6aebb Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 17 Aug 2017 16:47:00 +0200
Subject: dsa: remove unused net_device arg from handlers

compile tested only, but saw no warnings/errors with
allmodconfig build.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dsa.h     | 6 ++----
 net/dsa/dsa.c         | 4 ++--
 net/dsa/tag_brcm.c    | 3 +--
 net/dsa/tag_dsa.c     | 3 +--
 net/dsa/tag_edsa.c    | 3 +--
 net/dsa/tag_ksz.c     | 3 +--
 net/dsa/tag_lan9303.c | 2 +-
 net/dsa/tag_mtk.c     | 3 +--
 net/dsa/tag_qca.c     | 3 +--
 net/dsa/tag_trailer.c | 3 +--
 10 files changed, 12 insertions(+), 21 deletions(-)

(limited to 'include/net')

diff --git a/include/net/dsa.h b/include/net/dsa.h
index 7f46b521313e..398ca8d70ccd 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -104,8 +104,7 @@ struct packet_type;
 struct dsa_device_ops {
 	struct sk_buff *(*xmit)(struct sk_buff *skb, struct net_device *dev);
 	struct sk_buff *(*rcv)(struct sk_buff *skb, struct net_device *dev,
-			       struct packet_type *pt,
-			       struct net_device *orig_dev);
+			       struct packet_type *pt);
 	int (*flow_dissect)(const struct sk_buff *skb, __be16 *proto,
 			    int *offset);
 };
@@ -134,8 +133,7 @@ struct dsa_switch_tree {
 	/* Copy of tag_ops->rcv for faster access in hot path */
 	struct sk_buff *	(*rcv)(struct sk_buff *skb,
 				       struct net_device *dev,
-				       struct packet_type *pt,
-				       struct net_device *orig_dev);
+				       struct packet_type *pt);
 
 	/*
 	 * The switch port to which the CPU is attached.
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index 99e38af85fc5..03c58b0eb082 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -186,7 +186,7 @@ struct net_device *dsa_dev_to_net_device(struct device *dev)
 EXPORT_SYMBOL_GPL(dsa_dev_to_net_device);
 
 static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev,
-			  struct packet_type *pt, struct net_device *orig_dev)
+			  struct packet_type *pt, struct net_device *unused)
 {
 	struct dsa_switch_tree *dst = dev->dsa_ptr;
 	struct sk_buff *nskb = NULL;
@@ -202,7 +202,7 @@ static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev,
 	if (!skb)
 		return 0;
 
-	nskb = dst->rcv(skb, dev, pt, orig_dev);
+	nskb = dst->rcv(skb, dev, pt);
 	if (!nskb) {
 		kfree_skb(skb);
 		return 0;
diff --git a/net/dsa/tag_brcm.c b/net/dsa/tag_brcm.c
index c697d9815177..de74c3f77818 100644
--- a/net/dsa/tag_brcm.c
+++ b/net/dsa/tag_brcm.c
@@ -89,8 +89,7 @@ static struct sk_buff *brcm_tag_xmit(struct sk_buff *skb, struct net_device *dev
 }
 
 static struct sk_buff *brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev,
-				    struct packet_type *pt,
-				    struct net_device *orig_dev)
+				    struct packet_type *pt)
 {
 	struct dsa_switch_tree *dst = dev->dsa_ptr;
 	struct dsa_port *cpu_dp = dsa_get_cpu_port(dst);
diff --git a/net/dsa/tag_dsa.c b/net/dsa/tag_dsa.c
index 12867a4b458f..fbf9ca954773 100644
--- a/net/dsa/tag_dsa.c
+++ b/net/dsa/tag_dsa.c
@@ -65,8 +65,7 @@ static struct sk_buff *dsa_xmit(struct sk_buff *skb, struct net_device *dev)
 }
 
 static struct sk_buff *dsa_rcv(struct sk_buff *skb, struct net_device *dev,
-			       struct packet_type *pt,
-			       struct net_device *orig_dev)
+			       struct packet_type *pt)
 {
 	struct dsa_switch_tree *dst = dev->dsa_ptr;
 	struct dsa_switch *ds;
diff --git a/net/dsa/tag_edsa.c b/net/dsa/tag_edsa.c
index 67a9d26f9075..76367ba1b2e2 100644
--- a/net/dsa/tag_edsa.c
+++ b/net/dsa/tag_edsa.c
@@ -78,8 +78,7 @@ static struct sk_buff *edsa_xmit(struct sk_buff *skb, struct net_device *dev)
 }
 
 static struct sk_buff *edsa_rcv(struct sk_buff *skb, struct net_device *dev,
-				struct packet_type *pt,
-				struct net_device *orig_dev)
+				struct packet_type *pt)
 {
 	struct dsa_switch_tree *dst = dev->dsa_ptr;
 	struct dsa_switch *ds;
diff --git a/net/dsa/tag_ksz.c b/net/dsa/tag_ksz.c
index de66ca8e6201..17f30675c15c 100644
--- a/net/dsa/tag_ksz.c
+++ b/net/dsa/tag_ksz.c
@@ -76,8 +76,7 @@ static struct sk_buff *ksz_xmit(struct sk_buff *skb, struct net_device *dev)
 }
 
 static struct sk_buff *ksz_rcv(struct sk_buff *skb, struct net_device *dev,
-			       struct packet_type *pt,
-			       struct net_device *orig_dev)
+			       struct packet_type *pt)
 {
 	struct dsa_switch_tree *dst = dev->dsa_ptr;
 	struct dsa_port *cpu_dp = dsa_get_cpu_port(dst);
diff --git a/net/dsa/tag_lan9303.c b/net/dsa/tag_lan9303.c
index e23e7635fa00..0b9826105e42 100644
--- a/net/dsa/tag_lan9303.c
+++ b/net/dsa/tag_lan9303.c
@@ -68,7 +68,7 @@ static struct sk_buff *lan9303_xmit(struct sk_buff *skb, struct net_device *dev)
 }
 
 static struct sk_buff *lan9303_rcv(struct sk_buff *skb, struct net_device *dev,
-			struct packet_type *pt, struct net_device *orig_dev)
+			struct packet_type *pt)
 {
 	u16 *lan9303_tag;
 	struct dsa_switch_tree *dst = dev->dsa_ptr;
diff --git a/net/dsa/tag_mtk.c b/net/dsa/tag_mtk.c
index 02163c045a96..ec8ee5f43255 100644
--- a/net/dsa/tag_mtk.c
+++ b/net/dsa/tag_mtk.c
@@ -44,8 +44,7 @@ static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb,
 }
 
 static struct sk_buff *mtk_tag_rcv(struct sk_buff *skb, struct net_device *dev,
-				   struct packet_type *pt,
-				   struct net_device *orig_dev)
+				   struct packet_type *pt)
 {
 	struct dsa_switch_tree *dst = dev->dsa_ptr;
 	struct dsa_switch *ds;
diff --git a/net/dsa/tag_qca.c b/net/dsa/tag_qca.c
index 1867a3d11f28..1d4c70711c0f 100644
--- a/net/dsa/tag_qca.c
+++ b/net/dsa/tag_qca.c
@@ -63,8 +63,7 @@ static struct sk_buff *qca_tag_xmit(struct sk_buff *skb, struct net_device *dev)
 }
 
 static struct sk_buff *qca_tag_rcv(struct sk_buff *skb, struct net_device *dev,
-				   struct packet_type *pt,
-				   struct net_device *orig_dev)
+				   struct packet_type *pt)
 {
 	struct dsa_switch_tree *dst = dev->dsa_ptr;
 	struct dsa_port *cpu_dp = dsa_get_cpu_port(dst);
diff --git a/net/dsa/tag_trailer.c b/net/dsa/tag_trailer.c
index b09e56214005..8707157dea32 100644
--- a/net/dsa/tag_trailer.c
+++ b/net/dsa/tag_trailer.c
@@ -56,8 +56,7 @@ static struct sk_buff *trailer_xmit(struct sk_buff *skb, struct net_device *dev)
 }
 
 static struct sk_buff *trailer_rcv(struct sk_buff *skb, struct net_device *dev,
-				   struct packet_type *pt,
-				   struct net_device *orig_dev)
+				   struct packet_type *pt)
 {
 	struct dsa_switch_tree *dst = dev->dsa_ptr;
 	struct dsa_port *cpu_dp = dsa_get_cpu_port(dst);
-- 
cgit v1.2.3


From 7d3f0cd43feea1636dd7746f22fe8249b34d1b79 Mon Sep 17 00:00:00 2001
From: Gao Feng <gfree.wind@vip.163.com>
Date: Fri, 18 Aug 2017 15:23:24 +0800
Subject: net: sched: Add the invalid handle check in qdisc_class_find

Add the invalid handle "0" check to avoid unnecessary search, because
the qdisc uses the skb->priority as the handle value to look up, and
it is "0" usually.

Signed-off-by: Gao Feng <gfree.wind@vip.163.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/net')

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 5865db91976b..107c52432245 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -393,6 +393,9 @@ qdisc_class_find(const struct Qdisc_class_hash *hash, u32 id)
 	struct Qdisc_class_common *cl;
 	unsigned int h;
 
+	if (!id)
+		return NULL;
+
 	h = qdisc_class_hash(id, hash->hashmask);
 	hlist_for_each_entry(cl, &hash->hash[h], hnode) {
 		if (cl->classid == id)
-- 
cgit v1.2.3


From 111993692741a7044e6c01b428cecf1071de3d0b Mon Sep 17 00:00:00 2001
From: Tonghao Zhang <xiangxia.m.yue@gmail.com>
Date: Mon, 21 Aug 2017 23:33:49 -0700
Subject: tcp: Remove the unused parameter for tcp_try_fastopen.

Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h       | 3 +--
 net/ipv4/tcp_fastopen.c | 6 ++----
 net/ipv4/tcp_input.c    | 2 +-
 3 files changed, 4 insertions(+), 7 deletions(-)

(limited to 'include/net')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index afdab3781425..a995004ae946 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1533,8 +1533,7 @@ int tcp_fastopen_reset_cipher(void *key, unsigned int len);
 void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb);
 struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb,
 			      struct request_sock *req,
-			      struct tcp_fastopen_cookie *foc,
-			      struct dst_entry *dst);
+			      struct tcp_fastopen_cookie *foc);
 void tcp_fastopen_init_key_once(bool publish);
 bool tcp_fastopen_cookie_check(struct sock *sk, u16 *mss,
 			     struct tcp_fastopen_cookie *cookie);
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index ce9c7fef200f..e3c33220c418 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -171,7 +171,6 @@ void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb)
 
 static struct sock *tcp_fastopen_create_child(struct sock *sk,
 					      struct sk_buff *skb,
-					      struct dst_entry *dst,
 					      struct request_sock *req)
 {
 	struct tcp_sock *tp;
@@ -278,8 +277,7 @@ static bool tcp_fastopen_queue_check(struct sock *sk)
  */
 struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb,
 			      struct request_sock *req,
-			      struct tcp_fastopen_cookie *foc,
-			      struct dst_entry *dst)
+			      struct tcp_fastopen_cookie *foc)
 {
 	struct tcp_fastopen_cookie valid_foc = { .len = -1 };
 	bool syn_data = TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1;
@@ -312,7 +310,7 @@ struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb,
 		 * data in SYN_RECV state.
 		 */
 fastopen:
-		child = tcp_fastopen_create_child(sk, skb, dst, req);
+		child = tcp_fastopen_create_child(sk, skb, req);
 		if (child) {
 			foc->len = -1;
 			NET_INC_STATS(sock_net(sk),
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index ab908949ee95..d3421ee9a10a 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -6150,7 +6150,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
 	tcp_openreq_init_rwin(req, sk, dst);
 	if (!want_cookie) {
 		tcp_reqsk_record_syn(sk, req, skb);
-		fastopen_sk = tcp_try_fastopen(sk, skb, req, &foc, dst);
+		fastopen_sk = tcp_try_fastopen(sk, skb, req, &foc);
 	}
 	if (fastopen_sk) {
 		af_ops->send_synack(fastopen_sk, dst, &fl, req,
-- 
cgit v1.2.3


From 84e54fe0a5eaed696dee4019c396f8396f5a908b Mon Sep 17 00:00:00 2001
From: William Tu <u9012063@gmail.com>
Date: Tue, 22 Aug 2017 09:40:28 -0700
Subject: gre: introduce native tunnel support for ERSPAN

The patch adds ERSPAN type II tunnel support.  The implementation
is based on the draft at [1].  One of the purposes is for Linux
box to be able to receive ERSPAN monitoring traffic sent from
the Cisco switch, by creating a ERSPAN tunnel device.
In addition, the patch also adds ERSPAN TX, so Linux virtual
switch can redirect monitored traffic to the ERSPAN tunnel device.
The traffic will be encapsulated into ERSPAN and sent out.

The implementation reuses tunnel key as ERSPAN session ID, and
field 'erspan' as ERSPAN Index fields:
./ip link add dev ers11 type erspan seq key 100 erspan 123 \
			local 172.16.1.200 remote 172.16.1.100

To use the above device as ERSPAN receiver, configure
Nexus 5000 switch as below:

monitor session 100 type erspan-source
  erspan-id 123
  vrf default
  destination ip 172.16.1.200
  source interface Ethernet1/11 both
  source interface Ethernet1/12 both
  no shut
monitor erspan origin ip-address 172.16.1.100 global

[1] https://tools.ietf.org/html/draft-foschiano-erspan-01
[2] iproute2 patch: http://marc.info/?l=linux-netdev&m=150306086924951&w=2
[3] test script: http://marc.info/?l=linux-netdev&m=150231021807304&w=2

Signed-off-by: William Tu <u9012063@gmail.com>
Signed-off-by: Meenakshi Vohra <mvohra@vmware.com>
Cc: Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
Cc: Hideaki YOSHIFUJI <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/erspan.h           |  61 ++++++++++
 include/net/ip_tunnels.h       |   3 +
 include/uapi/linux/if_ether.h  |   1 +
 include/uapi/linux/if_tunnel.h |   1 +
 net/ipv4/ip_gre.c              | 269 +++++++++++++++++++++++++++++++++++++++++
 5 files changed, 335 insertions(+)
 create mode 100644 include/net/erspan.h

(limited to 'include/net')

diff --git a/include/net/erspan.h b/include/net/erspan.h
new file mode 100644
index 000000000000..ca94fc86865e
--- /dev/null
+++ b/include/net/erspan.h
@@ -0,0 +1,61 @@
+#ifndef __LINUX_ERSPAN_H
+#define __LINUX_ERSPAN_H
+
+/*
+ * GRE header for ERSPAN encapsulation (8 octets [34:41]) -- 8 bytes
+ *       0                   1                   2                   3
+ *      0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ *     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *     |0|0|0|1|0|00000|000000000|00000|    Protocol Type for ERSPAN   |
+ *     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *     |      Sequence Number (increments per packet per session)      |
+ *     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ *  Note that in the above GRE header [RFC1701] out of the C, R, K, S,
+ *  s, Recur, Flags, Version fields only S (bit 03) is set to 1. The
+ *  other fields are set to zero, so only a sequence number follows.
+ *
+ *  ERSPAN Type II header (8 octets [42:49])
+ *  0                   1                   2                   3
+ *  0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |  Ver  |          VLAN         | COS | En|T|    Session ID     |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |      Reserved         |                  Index                |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * GRE proto ERSPAN type II = 0x88BE, type III = 0x22EB
+ */
+
+#define ERSPAN_VERSION	0x1
+
+#define VER_MASK	0xf000
+#define VLAN_MASK	0x0fff
+#define COS_MASK	0xe000
+#define EN_MASK		0x1800
+#define T_MASK		0x0400
+#define ID_MASK		0x03ff
+#define INDEX_MASK	0xfffff
+
+enum erspan_encap_type {
+	ERSPAN_ENCAP_NOVLAN = 0x0,	/* originally without VLAN tag */
+	ERSPAN_ENCAP_ISL = 0x1,		/* originally ISL encapsulated */
+	ERSPAN_ENCAP_8021Q = 0x2,	/* originally 802.1Q encapsulated */
+	ERSPAN_ENCAP_INFRAME = 0x3,	/* VLAN tag perserved in frame */
+};
+
+struct erspan_metadata {
+	__be32 index;   /* type II */
+};
+
+struct erspanhdr {
+	__be16 ver_vlan;
+#define VER_OFFSET  12
+	__be16 session_id;
+#define COS_OFFSET  13
+#define EN_OFFSET   11
+#define T_OFFSET    10
+	struct erspan_metadata md;
+};
+
+#endif
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index 520809912f03..625c29329372 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -115,6 +115,9 @@ struct ip_tunnel {
 	u32		o_seqno;	/* The last output seqno */
 	int		tun_hlen;	/* Precalculated header length */
 
+	/* This field used only by ERSPAN */
+	u32		index;		/* ERSPAN type II index */
+
 	struct dst_cache dst_cache;
 
 	struct ip_tunnel_parm parms;
diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h
index 5bc9bfd816b7..efeb1190c2ca 100644
--- a/include/uapi/linux/if_ether.h
+++ b/include/uapi/linux/if_ether.h
@@ -66,6 +66,7 @@
 #define ETH_P_ATALK	0x809B		/* Appletalk DDP		*/
 #define ETH_P_AARP	0x80F3		/* Appletalk AARP		*/
 #define ETH_P_8021Q	0x8100          /* 802.1Q VLAN Extended Header  */
+#define ETH_P_ERSPAN	0x88BE		/* ERSPAN type II		*/
 #define ETH_P_IPX	0x8137		/* IPX over DIX			*/
 #define ETH_P_IPV6	0x86DD		/* IPv6 over bluebook		*/
 #define ETH_P_PAUSE	0x8808		/* IEEE Pause frames. See 802.3 31B */
diff --git a/include/uapi/linux/if_tunnel.h b/include/uapi/linux/if_tunnel.h
index 6792d1967d31..2e520883c054 100644
--- a/include/uapi/linux/if_tunnel.h
+++ b/include/uapi/linux/if_tunnel.h
@@ -134,6 +134,7 @@ enum {
 	IFLA_GRE_COLLECT_METADATA,
 	IFLA_GRE_IGNORE_DF,
 	IFLA_GRE_FWMARK,
+	IFLA_GRE_ERSPAN_INDEX,
 	__IFLA_GRE_MAX,
 };
 
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 7a7829e839c2..6e8a62289e03 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -48,6 +48,7 @@
 #include <net/rtnetlink.h>
 #include <net/gre.h>
 #include <net/dst_metadata.h>
+#include <net/erspan.h>
 
 /*
    Problems & solutions
@@ -115,6 +116,7 @@ static int ipgre_tunnel_init(struct net_device *dev);
 
 static unsigned int ipgre_net_id __read_mostly;
 static unsigned int gre_tap_net_id __read_mostly;
+static unsigned int erspan_net_id __read_mostly;
 
 static void ipgre_err(struct sk_buff *skb, u32 info,
 		      const struct tnl_ptk_info *tpi)
@@ -246,6 +248,56 @@ static void gre_err(struct sk_buff *skb, u32 info)
 	ipgre_err(skb, info, &tpi);
 }
 
+static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
+		      int gre_hdr_len)
+{
+	struct net *net = dev_net(skb->dev);
+	struct metadata_dst *tun_dst = NULL;
+	struct ip_tunnel_net *itn;
+	struct ip_tunnel *tunnel;
+	struct erspanhdr *ershdr;
+	const struct iphdr *iph;
+	__be32 session_id;
+	__be32 index;
+	int len;
+
+	itn = net_generic(net, erspan_net_id);
+	iph = ip_hdr(skb);
+	len = gre_hdr_len + sizeof(*ershdr);
+
+	if (unlikely(!pskb_may_pull(skb, len)))
+		return -ENOMEM;
+
+	iph = ip_hdr(skb);
+	ershdr = (struct erspanhdr *)(skb->data + gre_hdr_len);
+
+	/* The original GRE header does not have key field,
+	 * Use ERSPAN 10-bit session ID as key.
+	 */
+	session_id = cpu_to_be32(ntohs(ershdr->session_id));
+	tpi->key = session_id;
+	index = ershdr->md.index;
+	tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex,
+				  tpi->flags | TUNNEL_KEY,
+				  iph->saddr, iph->daddr, tpi->key);
+
+	if (tunnel) {
+		if (__iptunnel_pull_header(skb,
+					   gre_hdr_len + sizeof(*ershdr),
+					   htons(ETH_P_TEB),
+					   false, false) < 0)
+			goto drop;
+
+		tunnel->index = ntohl(index);
+		skb_reset_mac_header(skb);
+		ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
+		return PACKET_RCVD;
+	}
+drop:
+	kfree_skb(skb);
+	return PACKET_RCVD;
+}
+
 static int __ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
 		       struct ip_tunnel_net *itn, int hdr_len, bool raw_proto)
 {
@@ -328,6 +380,11 @@ static int gre_rcv(struct sk_buff *skb)
 	if (hdr_len < 0)
 		goto drop;
 
+	if (unlikely(tpi.proto == htons(ETH_P_ERSPAN))) {
+		if (erspan_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
+			return 0;
+	}
+
 	if (ipgre_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
 		return 0;
 
@@ -503,6 +560,81 @@ free_skb:
 	return NETDEV_TX_OK;
 }
 
+static inline u8 tos_to_cos(u8 tos)
+{
+	u8 dscp, cos;
+
+	dscp = tos >> 2;
+	cos = dscp >> 3;
+	return cos;
+}
+
+static void erspan_build_header(struct sk_buff *skb,
+				__be32 id, u32 index, bool truncate)
+{
+	struct iphdr *iphdr = ip_hdr(skb);
+	struct ethhdr *eth = eth_hdr(skb);
+	enum erspan_encap_type enc_type;
+	struct erspanhdr *ershdr;
+	struct qtag_prefix {
+		__be16 eth_type;
+		__be16 tci;
+	} *qp;
+	u16 vlan_tci = 0;
+
+	enc_type = ERSPAN_ENCAP_NOVLAN;
+
+	/* If mirrored packet has vlan tag, extract tci and
+	 *  perserve vlan header in the mirrored frame.
+	 */
+	if (eth->h_proto == htons(ETH_P_8021Q)) {
+		qp = (struct qtag_prefix *)(skb->data + 2 * ETH_ALEN);
+		vlan_tci = ntohs(qp->tci);
+		enc_type = ERSPAN_ENCAP_INFRAME;
+	}
+
+	skb_push(skb, sizeof(*ershdr));
+	ershdr = (struct erspanhdr *)skb->data;
+	memset(ershdr, 0, sizeof(*ershdr));
+
+	ershdr->ver_vlan = htons((vlan_tci & VLAN_MASK) |
+				 (ERSPAN_VERSION << VER_OFFSET));
+	ershdr->session_id = htons((u16)(ntohl(id) & ID_MASK) |
+			   ((tos_to_cos(iphdr->tos) << COS_OFFSET) & COS_MASK) |
+			   (enc_type << EN_OFFSET & EN_MASK) |
+			   ((truncate << T_OFFSET) & T_MASK));
+	ershdr->md.index = htonl(index & INDEX_MASK);
+}
+
+static netdev_tx_t erspan_xmit(struct sk_buff *skb,
+			       struct net_device *dev)
+{
+	struct ip_tunnel *tunnel = netdev_priv(dev);
+	bool truncate = false;
+
+	if (gre_handle_offloads(skb, false))
+		goto free_skb;
+
+	if (skb_cow_head(skb, dev->needed_headroom))
+		goto free_skb;
+
+	if (skb->len > dev->mtu) {
+		pskb_trim(skb, dev->mtu);
+		truncate = true;
+	}
+
+	/* Push ERSPAN header */
+	erspan_build_header(skb, tunnel->parms.o_key, tunnel->index, truncate);
+	tunnel->parms.o_flags &= ~TUNNEL_KEY;
+	__gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_ERSPAN));
+	return NETDEV_TX_OK;
+
+free_skb:
+	kfree_skb(skb);
+	dev->stats.tx_dropped++;
+	return NETDEV_TX_OK;
+}
+
 static netdev_tx_t gre_tap_xmit(struct sk_buff *skb,
 				struct net_device *dev)
 {
@@ -828,6 +960,39 @@ out:
 	return ipgre_tunnel_validate(tb, data, extack);
 }
 
+static int erspan_validate(struct nlattr *tb[], struct nlattr *data[],
+			   struct netlink_ext_ack *extack)
+{
+	__be16 flags = 0;
+	int ret;
+
+	if (!data)
+		return 0;
+
+	ret = ipgre_tap_validate(tb, data, extack);
+	if (ret)
+		return ret;
+
+	/* ERSPAN should only have GRE sequence and key flag */
+	flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
+	flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
+	if (flags != (GRE_SEQ | GRE_KEY))
+		return -EINVAL;
+
+	/* ERSPAN Session ID only has 10-bit. Since we reuse
+	 * 32-bit key field as ID, check it's range.
+	 */
+	if (data[IFLA_GRE_IKEY] &&
+	    (ntohl(nla_get_be32(data[IFLA_GRE_IKEY])) & ~ID_MASK))
+		return -EINVAL;
+
+	if (data[IFLA_GRE_OKEY] &&
+	    (ntohl(nla_get_be32(data[IFLA_GRE_OKEY])) & ~ID_MASK))
+		return -EINVAL;
+
+	return 0;
+}
+
 static int ipgre_netlink_parms(struct net_device *dev,
 				struct nlattr *data[],
 				struct nlattr *tb[],
@@ -892,6 +1057,13 @@ static int ipgre_netlink_parms(struct net_device *dev,
 	if (data[IFLA_GRE_FWMARK])
 		*fwmark = nla_get_u32(data[IFLA_GRE_FWMARK]);
 
+	if (data[IFLA_GRE_ERSPAN_INDEX]) {
+		t->index = nla_get_u32(data[IFLA_GRE_ERSPAN_INDEX]);
+
+		if (t->index & ~INDEX_MASK)
+			return -EINVAL;
+	}
+
 	return 0;
 }
 
@@ -949,6 +1121,36 @@ static const struct net_device_ops gre_tap_netdev_ops = {
 	.ndo_fill_metadata_dst	= gre_fill_metadata_dst,
 };
 
+static int erspan_tunnel_init(struct net_device *dev)
+{
+	struct ip_tunnel *tunnel = netdev_priv(dev);
+	int t_hlen;
+
+	tunnel->tun_hlen = 8;
+	tunnel->parms.iph.protocol = IPPROTO_GRE;
+	t_hlen = tunnel->hlen + sizeof(struct iphdr) + sizeof(struct erspanhdr);
+
+	dev->needed_headroom = LL_MAX_HEADER + t_hlen + 4;
+	dev->mtu = ETH_DATA_LEN - t_hlen - 4;
+	dev->features		|= GRE_FEATURES;
+	dev->hw_features	|= GRE_FEATURES;
+	dev->priv_flags		|= IFF_LIVE_ADDR_CHANGE;
+
+	return ip_tunnel_init(dev);
+}
+
+static const struct net_device_ops erspan_netdev_ops = {
+	.ndo_init		= erspan_tunnel_init,
+	.ndo_uninit		= ip_tunnel_uninit,
+	.ndo_start_xmit		= erspan_xmit,
+	.ndo_set_mac_address	= eth_mac_addr,
+	.ndo_validate_addr	= eth_validate_addr,
+	.ndo_change_mtu		= ip_tunnel_change_mtu,
+	.ndo_get_stats64	= ip_tunnel_get_stats64,
+	.ndo_get_iflink		= ip_tunnel_get_iflink,
+	.ndo_fill_metadata_dst	= gre_fill_metadata_dst,
+};
+
 static void ipgre_tap_setup(struct net_device *dev)
 {
 	ether_setup(dev);
@@ -1041,6 +1243,8 @@ static size_t ipgre_get_size(const struct net_device *dev)
 		nla_total_size(1) +
 		/* IFLA_GRE_FWMARK */
 		nla_total_size(4) +
+		/* IFLA_GRE_ERSPAN_INDEX */
+		nla_total_size(4) +
 		0;
 }
 
@@ -1083,12 +1287,25 @@ static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
 			goto nla_put_failure;
 	}
 
+	if (t->index)
+		if (nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, t->index))
+			goto nla_put_failure;
+
 	return 0;
 
 nla_put_failure:
 	return -EMSGSIZE;
 }
 
+static void erspan_setup(struct net_device *dev)
+{
+	ether_setup(dev);
+	dev->netdev_ops = &erspan_netdev_ops;
+	dev->priv_flags &= ~IFF_TX_SKB_SHARING;
+	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
+	ip_tunnel_setup(dev, erspan_net_id);
+}
+
 static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
 	[IFLA_GRE_LINK]		= { .type = NLA_U32 },
 	[IFLA_GRE_IFLAGS]	= { .type = NLA_U16 },
@@ -1107,6 +1324,7 @@ static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
 	[IFLA_GRE_COLLECT_METADATA]	= { .type = NLA_FLAG },
 	[IFLA_GRE_IGNORE_DF]	= { .type = NLA_U8 },
 	[IFLA_GRE_FWMARK]	= { .type = NLA_U32 },
+	[IFLA_GRE_ERSPAN_INDEX]	= { .type = NLA_U32 },
 };
 
 static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
@@ -1139,6 +1357,21 @@ static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
 	.get_link_net	= ip_tunnel_get_link_net,
 };
 
+static struct rtnl_link_ops erspan_link_ops __read_mostly = {
+	.kind		= "erspan",
+	.maxtype	= IFLA_GRE_MAX,
+	.policy		= ipgre_policy,
+	.priv_size	= sizeof(struct ip_tunnel),
+	.setup		= erspan_setup,
+	.validate	= erspan_validate,
+	.newlink	= ipgre_newlink,
+	.changelink	= ipgre_changelink,
+	.dellink	= ip_tunnel_dellink,
+	.get_size	= ipgre_get_size,
+	.fill_info	= ipgre_fill_info,
+	.get_link_net	= ip_tunnel_get_link_net,
+};
+
 struct net_device *gretap_fb_dev_create(struct net *net, const char *name,
 					u8 name_assign_type)
 {
@@ -1202,6 +1435,26 @@ static struct pernet_operations ipgre_tap_net_ops = {
 	.size = sizeof(struct ip_tunnel_net),
 };
 
+static int __net_init erspan_init_net(struct net *net)
+{
+	return ip_tunnel_init_net(net, erspan_net_id,
+				  &erspan_link_ops, "erspan0");
+}
+
+static void __net_exit erspan_exit_net(struct net *net)
+{
+	struct ip_tunnel_net *itn = net_generic(net, erspan_net_id);
+
+	ip_tunnel_delete_net(itn, &erspan_link_ops);
+}
+
+static struct pernet_operations erspan_net_ops = {
+	.init = erspan_init_net,
+	.exit = erspan_exit_net,
+	.id   = &erspan_net_id,
+	.size = sizeof(struct ip_tunnel_net),
+};
+
 static int __init ipgre_init(void)
 {
 	int err;
@@ -1216,6 +1469,10 @@ static int __init ipgre_init(void)
 	if (err < 0)
 		goto pnet_tap_faied;
 
+	err = register_pernet_device(&erspan_net_ops);
+	if (err < 0)
+		goto pnet_erspan_failed;
+
 	err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO);
 	if (err < 0) {
 		pr_info("%s: can't add protocol\n", __func__);
@@ -1230,13 +1487,21 @@ static int __init ipgre_init(void)
 	if (err < 0)
 		goto tap_ops_failed;
 
+	err = rtnl_link_register(&erspan_link_ops);
+	if (err < 0)
+		goto erspan_link_failed;
+
 	return 0;
 
+erspan_link_failed:
+	rtnl_link_unregister(&ipgre_tap_ops);
 tap_ops_failed:
 	rtnl_link_unregister(&ipgre_link_ops);
 rtnl_link_failed:
 	gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
 add_proto_failed:
+	unregister_pernet_device(&erspan_net_ops);
+pnet_erspan_failed:
 	unregister_pernet_device(&ipgre_tap_net_ops);
 pnet_tap_faied:
 	unregister_pernet_device(&ipgre_net_ops);
@@ -1247,9 +1512,11 @@ static void __exit ipgre_fini(void)
 {
 	rtnl_link_unregister(&ipgre_tap_ops);
 	rtnl_link_unregister(&ipgre_link_ops);
+	rtnl_link_unregister(&erspan_link_ops);
 	gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
 	unregister_pernet_device(&ipgre_tap_net_ops);
 	unregister_pernet_device(&ipgre_net_ops);
+	unregister_pernet_device(&erspan_net_ops);
 }
 
 module_init(ipgre_init);
@@ -1257,5 +1524,7 @@ module_exit(ipgre_fini);
 MODULE_LICENSE("GPL");
 MODULE_ALIAS_RTNL_LINK("gre");
 MODULE_ALIAS_RTNL_LINK("gretap");
+MODULE_ALIAS_RTNL_LINK("erspan");
 MODULE_ALIAS_NETDEV("gre0");
 MODULE_ALIAS_NETDEV("gretap0");
+MODULE_ALIAS_NETDEV("erspan0");
-- 
cgit v1.2.3


From 98aaa913b4ed250324429f0a9e6d5f77a3b5276c Mon Sep 17 00:00:00 2001
From: Mike Maloney <maloney@google.com>
Date: Tue, 22 Aug 2017 17:08:48 -0400
Subject: tcp: Extend SOF_TIMESTAMPING_RX_SOFTWARE to TCP recvmsg

When SOF_TIMESTAMPING_RX_SOFTWARE is enabled for tcp sockets, return the
timestamp corresponding to the highest sequence number data returned.

Previously the skb->tstamp is overwritten when a TCP packet is placed
in the out of order queue.  While the packet is in the ooo queue, save the
timestamp in the TCB_SKB_CB.  This space is shared with the gso_*
options which are only used on the tx path, and a previously unused 4
byte hole.

When skbs are coalesced either in the sk_receive_queue or the
out_of_order_queue always choose the timestamp of the appended skb to
maintain the invariant of returning the timestamp of the last byte in
the recvmsg buffer.

Signed-off-by: Mike Maloney <maloney@google.com>
Acked-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h    |  9 +++++++-
 net/ipv4/tcp.c       | 65 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 net/ipv4/tcp_input.c | 35 ++++++++++++++++++++++++----
 net/ipv4/tcp_ipv4.c  |  2 ++
 net/ipv6/tcp_ipv6.c  |  2 ++
 5 files changed, 108 insertions(+), 5 deletions(-)

(limited to 'include/net')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index a995004ae946..c614ff135b66 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -774,6 +774,12 @@ struct tcp_skb_cb {
 			u16	tcp_gso_segs;
 			u16	tcp_gso_size;
 		};
+
+		/* Used to stash the receive timestamp while this skb is in the
+		 * out of order queue, as skb->tstamp is overwritten by the
+		 * rbnode.
+		 */
+		ktime_t		swtstamp;
 	};
 	__u8		tcp_flags;	/* TCP header flags. (tcp[13])	*/
 
@@ -790,7 +796,8 @@ struct tcp_skb_cb {
 	__u8		ip_dsfield;	/* IPv4 tos or IPv6 dsfield	*/
 	__u8		txstamp_ack:1,	/* Record TX timestamp for ack? */
 			eor:1,		/* Is skb MSG_EOR marked? */
-			unused:6;
+			has_rxtstamp:1,	/* SKB has a RX timestamp	*/
+			unused:5;
 	__u32		ack_seq;	/* Sequence number ACK'd	*/
 	union {
 		struct {
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index d25e3bcca66b..0cce4472b4a1 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -269,6 +269,7 @@
 #include <linux/err.h>
 #include <linux/time.h>
 #include <linux/slab.h>
+#include <linux/errqueue.h>
 
 #include <net/icmp.h>
 #include <net/inet_common.h>
@@ -1695,6 +1696,61 @@ int tcp_peek_len(struct socket *sock)
 }
 EXPORT_SYMBOL(tcp_peek_len);
 
+static void tcp_update_recv_tstamps(struct sk_buff *skb,
+				    struct scm_timestamping *tss)
+{
+	if (skb->tstamp)
+		tss->ts[0] = ktime_to_timespec(skb->tstamp);
+	else
+		tss->ts[0] = (struct timespec) {0};
+
+	if (skb_hwtstamps(skb)->hwtstamp)
+		tss->ts[2] = ktime_to_timespec(skb_hwtstamps(skb)->hwtstamp);
+	else
+		tss->ts[2] = (struct timespec) {0};
+}
+
+/* Similar to __sock_recv_timestamp, but does not require an skb */
+void tcp_recv_timestamp(struct msghdr *msg, const struct sock *sk,
+			struct scm_timestamping *tss)
+{
+	struct timeval tv;
+	bool has_timestamping = false;
+
+	if (tss->ts[0].tv_sec || tss->ts[0].tv_nsec) {
+		if (sock_flag(sk, SOCK_RCVTSTAMP)) {
+			if (sock_flag(sk, SOCK_RCVTSTAMPNS)) {
+				put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS,
+					 sizeof(tss->ts[0]), &tss->ts[0]);
+			} else {
+				tv.tv_sec = tss->ts[0].tv_sec;
+				tv.tv_usec = tss->ts[0].tv_nsec / 1000;
+
+				put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP,
+					 sizeof(tv), &tv);
+			}
+		}
+
+		if (sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE)
+			has_timestamping = true;
+		else
+			tss->ts[0] = (struct timespec) {0};
+	}
+
+	if (tss->ts[2].tv_sec || tss->ts[2].tv_nsec) {
+		if (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE)
+			has_timestamping = true;
+		else
+			tss->ts[2] = (struct timespec) {0};
+	}
+
+	if (has_timestamping) {
+		tss->ts[1] = (struct timespec) {0};
+		put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING,
+			 sizeof(*tss), tss);
+	}
+}
+
 /*
  *	This routine copies from a sock struct into the user buffer.
  *
@@ -1716,6 +1772,8 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
 	long timeo;
 	struct sk_buff *skb, *last;
 	u32 urg_hole = 0;
+	struct scm_timestamping tss;
+	bool has_tss = false;
 
 	if (unlikely(flags & MSG_ERRQUEUE))
 		return inet_recv_error(sk, msg, len, addr_len);
@@ -1911,6 +1969,10 @@ skip_copy:
 		if (used + offset < skb->len)
 			continue;
 
+		if (TCP_SKB_CB(skb)->has_rxtstamp) {
+			tcp_update_recv_tstamps(skb, &tss);
+			has_tss = true;
+		}
 		if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
 			goto found_fin_ok;
 		if (!(flags & MSG_PEEK))
@@ -1929,6 +1991,9 @@ skip_copy:
 	 * on connected socket. I was just happy when found this 8) --ANK
 	 */
 
+	if (has_tss)
+		tcp_recv_timestamp(msg, sk, &tss);
+
 	/* Clean up data we have read: This will do ACK frames. */
 	tcp_cleanup_rbuf(sk, copied);
 
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index d3421ee9a10a..568ccfd6dd37 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4246,9 +4246,15 @@ static void tcp_sack_remove(struct tcp_sock *tp)
 	tp->rx_opt.num_sacks = num_sacks;
 }
 
+enum tcp_queue {
+	OOO_QUEUE,
+	RCV_QUEUE,
+};
+
 /**
  * tcp_try_coalesce - try to merge skb to prior one
  * @sk: socket
+ * @dest: destination queue
  * @to: prior buffer
  * @from: buffer to add in queue
  * @fragstolen: pointer to boolean
@@ -4260,6 +4266,7 @@ static void tcp_sack_remove(struct tcp_sock *tp)
  * Returns true if caller should free @from instead of queueing it
  */
 static bool tcp_try_coalesce(struct sock *sk,
+			     enum tcp_queue dest,
 			     struct sk_buff *to,
 			     struct sk_buff *from,
 			     bool *fragstolen)
@@ -4281,6 +4288,15 @@ static bool tcp_try_coalesce(struct sock *sk,
 	TCP_SKB_CB(to)->end_seq = TCP_SKB_CB(from)->end_seq;
 	TCP_SKB_CB(to)->ack_seq = TCP_SKB_CB(from)->ack_seq;
 	TCP_SKB_CB(to)->tcp_flags |= TCP_SKB_CB(from)->tcp_flags;
+
+	if (TCP_SKB_CB(from)->has_rxtstamp) {
+		TCP_SKB_CB(to)->has_rxtstamp = true;
+		if (dest == OOO_QUEUE)
+			TCP_SKB_CB(to)->swtstamp = TCP_SKB_CB(from)->swtstamp;
+		else
+			to->tstamp = from->tstamp;
+	}
+
 	return true;
 }
 
@@ -4315,6 +4331,9 @@ static void tcp_ofo_queue(struct sock *sk)
 		}
 		p = rb_next(p);
 		rb_erase(&skb->rbnode, &tp->out_of_order_queue);
+		/* Replace tstamp which was stomped by rbnode */
+		if (TCP_SKB_CB(skb)->has_rxtstamp)
+			skb->tstamp = TCP_SKB_CB(skb)->swtstamp;
 
 		if (unlikely(!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))) {
 			SOCK_DEBUG(sk, "ofo packet was already received\n");
@@ -4326,7 +4345,8 @@ static void tcp_ofo_queue(struct sock *sk)
 			   TCP_SKB_CB(skb)->end_seq);
 
 		tail = skb_peek_tail(&sk->sk_receive_queue);
-		eaten = tail && tcp_try_coalesce(sk, tail, skb, &fragstolen);
+		eaten = tail && tcp_try_coalesce(sk, RCV_QUEUE,
+						 tail, skb, &fragstolen);
 		tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq);
 		fin = TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN;
 		if (!eaten)
@@ -4380,6 +4400,10 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
 		return;
 	}
 
+	/* Stash tstamp to avoid being stomped on by rbnode */
+	if (TCP_SKB_CB(skb)->has_rxtstamp)
+		TCP_SKB_CB(skb)->swtstamp = skb->tstamp;
+
 	inet_csk_schedule_ack(sk);
 
 	NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOQUEUE);
@@ -4405,7 +4429,8 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
 	/* In the typical case, we are adding an skb to the end of the list.
 	 * Use of ooo_last_skb avoids the O(Log(N)) rbtree lookup.
 	 */
-	if (tcp_try_coalesce(sk, tp->ooo_last_skb, skb, &fragstolen)) {
+	if (tcp_try_coalesce(sk, OOO_QUEUE, tp->ooo_last_skb,
+			     skb, &fragstolen)) {
 coalesce_done:
 		tcp_grow_window(sk, skb);
 		kfree_skb_partial(skb, fragstolen);
@@ -4455,7 +4480,8 @@ coalesce_done:
 				__kfree_skb(skb1);
 				goto merge_right;
 			}
-		} else if (tcp_try_coalesce(sk, skb1, skb, &fragstolen)) {
+		} else if (tcp_try_coalesce(sk, OOO_QUEUE, skb1,
+					    skb, &fragstolen)) {
 			goto coalesce_done;
 		}
 		p = &parent->rb_right;
@@ -4506,7 +4532,8 @@ static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int
 
 	__skb_pull(skb, hdrlen);
 	eaten = (tail &&
-		 tcp_try_coalesce(sk, tail, skb, fragstolen)) ? 1 : 0;
+		 tcp_try_coalesce(sk, RCV_QUEUE, tail,
+				  skb, fragstolen)) ? 1 : 0;
 	tcp_rcv_nxt_update(tcp_sk(sk), TCP_SKB_CB(skb)->end_seq);
 	if (!eaten) {
 		__skb_queue_tail(&sk->sk_receive_queue, skb);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 5af8b809dfbc..a63486afa7a7 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1637,6 +1637,8 @@ int tcp_v4_rcv(struct sk_buff *skb)
 	TCP_SKB_CB(skb)->tcp_tw_isn = 0;
 	TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
 	TCP_SKB_CB(skb)->sacked	 = 0;
+	TCP_SKB_CB(skb)->has_rxtstamp =
+			skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
 
 lookup:
 	sk = __inet_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th), th->source,
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index d79a1af3252e..abba3bc2a3d9 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1394,6 +1394,8 @@ static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
 	TCP_SKB_CB(skb)->tcp_tw_isn = 0;
 	TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
 	TCP_SKB_CB(skb)->sacked = 0;
+	TCP_SKB_CB(skb)->has_rxtstamp =
+			skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
 }
 
 static int tcp_v6_rcv(struct sk_buff *skb)
-- 
cgit v1.2.3


From f9cbe9a556afca9e82df9aebe4412d93769566b5 Mon Sep 17 00:00:00 2001
From: Antoine Ténart <antoine.tenart@free-electrons.com>
Date: Wed, 23 Aug 2017 09:46:54 +0200
Subject: net: define the TSO header size in net/tso.h

The TSO header size was defined in many drivers. Factorize the code and
define its size in net/tso.h.

Signed-off-by: Antoine Tenart <antoine.tenart@free-electrons.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/cavium/thunder/nicvf_queues.h | 1 -
 drivers/net/ethernet/freescale/fec_main.c          | 1 -
 drivers/net/ethernet/marvell/mv643xx_eth.c         | 2 --
 drivers/net/ethernet/marvell/mvneta.c              | 3 ---
 include/net/tso.h                                  | 2 ++
 5 files changed, 2 insertions(+), 7 deletions(-)

(limited to 'include/net')

diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h
index 57858522c33c..67d1a3230773 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h
@@ -277,7 +277,6 @@ struct snd_queue {
 	u16		xdp_free_cnt;
 	bool		is_xdp;
 
-#define	TSO_HEADER_SIZE	128
 	/* For TSO segment's header */
 	char		*tso_hdrs;
 	dma_addr_t	tso_hdrs_phys;
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index df09b254553d..56f56d6ada9c 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -226,7 +226,6 @@ MODULE_PARM_DESC(macaddr, "FEC Ethernet MAC address");
 
 #define COPYBREAK_DEFAULT	256
 
-#define TSO_HEADER_SIZE		128
 /* Max number of allowed TCP segments for software TSO */
 #define FEC_MAX_TSO_SEGS	100
 #define FEC_MAX_SKB_DESCS	(FEC_MAX_TSO_SEGS * 2 + MAX_SKB_FRAGS)
diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c
index 9c94ea9b2b80..fb2d533ae4ef 100644
--- a/drivers/net/ethernet/marvell/mv643xx_eth.c
+++ b/drivers/net/ethernet/marvell/mv643xx_eth.c
@@ -183,8 +183,6 @@ static char mv643xx_eth_driver_version[] = "1.4";
 #define DEFAULT_TX_QUEUE_SIZE	512
 #define SKB_DMA_REALIGN		((PAGE_SIZE - NET_SKB_PAD) % SMP_CACHE_BYTES)
 
-#define TSO_HEADER_SIZE		128
-
 /* Max number of allowed TCP segments for software TSO */
 #define MV643XX_MAX_TSO_SEGS 100
 #define MV643XX_MAX_SKB_DESCS (MV643XX_MAX_TSO_SEGS * 2 + MAX_SKB_FRAGS)
diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index 0aab74c2a209..35ff1ecfcff0 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -281,9 +281,6 @@
  */
 #define MVNETA_RSS_LU_TABLE_SIZE	1
 
-/* TSO header size */
-#define TSO_HEADER_SIZE 128
-
 /* Max number of Rx descriptors */
 #define MVNETA_MAX_RXD 128
 
diff --git a/include/net/tso.h b/include/net/tso.h
index b7be852bfe9d..9a56c39e6d0a 100644
--- a/include/net/tso.h
+++ b/include/net/tso.h
@@ -3,6 +3,8 @@
 
 #include <net/ip.h>
 
+#define TSO_HEADER_SIZE		128
+
 struct tso_t {
 	int next_frag_idx;
 	void *data;
-- 
cgit v1.2.3


From e457d86ada27cbd2f46ded75d4b4bc06e26d0e2e Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Wed, 23 Aug 2017 10:08:19 +0200
Subject: net: sched: add couple of goto_chain helpers

Add helpers to find out if a gact instance is goto_chain termination
action and to get chain index.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tc_act/tc_gact.h | 20 ++++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)

(limited to 'include/net')

diff --git a/include/net/tc_act/tc_gact.h b/include/net/tc_act/tc_gact.h
index d576374c4d6f..41afe1ce7b16 100644
--- a/include/net/tc_act/tc_gact.h
+++ b/include/net/tc_act/tc_gact.h
@@ -15,7 +15,8 @@ struct tcf_gact {
 };
 #define to_gact(a) ((struct tcf_gact *)a)
 
-static inline bool __is_tcf_gact_act(const struct tc_action *a, int act)
+static inline bool __is_tcf_gact_act(const struct tc_action *a, int act,
+				     bool is_ext)
 {
 #ifdef CONFIG_NET_CLS_ACT
 	struct tcf_gact *gact;
@@ -24,7 +25,8 @@ static inline bool __is_tcf_gact_act(const struct tc_action *a, int act)
 		return false;
 
 	gact = to_gact(a);
-	if (gact->tcf_action == act)
+	if ((!is_ext && gact->tcf_action == act) ||
+	    (is_ext && TC_ACT_EXT_CMP(gact->tcf_action, act)))
 		return true;
 
 #endif
@@ -33,12 +35,22 @@ static inline bool __is_tcf_gact_act(const struct tc_action *a, int act)
 
 static inline bool is_tcf_gact_shot(const struct tc_action *a)
 {
-	return __is_tcf_gact_act(a, TC_ACT_SHOT);
+	return __is_tcf_gact_act(a, TC_ACT_SHOT, false);
 }
 
 static inline bool is_tcf_gact_trap(const struct tc_action *a)
 {
-	return __is_tcf_gact_act(a, TC_ACT_TRAP);
+	return __is_tcf_gact_act(a, TC_ACT_TRAP, false);
+}
+
+static inline bool is_tcf_gact_goto_chain(const struct tc_action *a)
+{
+	return __is_tcf_gact_act(a, TC_ACT_GOTO_CHAIN, true);
+}
+
+static inline u32 tcf_gact_goto_chain_index(const struct tc_action *a)
+{
+	return a->goto_chain->index;
 }
 
 #endif /* __NET_TC_GACT_H */
-- 
cgit v1.2.3


From 1177009131bee310421f5c04c43d3777cbacbdc8 Mon Sep 17 00:00:00 2001
From: Arkadi Sharshevsky <arkadis@mellanox.com>
Date: Thu, 24 Aug 2017 08:39:59 +0200
Subject: devlink: Add Ethernet header for dpipe

This will be used by the IPv4 host table which will be introduced in the
following patches. This header is global and can be reused by many
drivers.

Signed-off-by: Arkadi Sharshevsky <arkadis@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/devlink.h        |  1 +
 include/uapi/linux/devlink.h |  8 ++++++++
 net/core/devlink.c           | 17 +++++++++++++++++
 3 files changed, 26 insertions(+)

(limited to 'include/net')

diff --git a/include/net/devlink.h b/include/net/devlink.h
index ed7687bbf5d0..ddb8b5227580 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -328,6 +328,7 @@ int devlink_dpipe_action_put(struct sk_buff *skb,
 			     struct devlink_dpipe_action *action);
 int devlink_dpipe_match_put(struct sk_buff *skb,
 			    struct devlink_dpipe_match *match);
+extern struct devlink_dpipe_header devlink_dpipe_header_ethernet;
 
 #else
 
diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index b0e807ac53bb..a855f8dcc8ee 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -226,4 +226,12 @@ enum devlink_dpipe_action_type {
 	DEVLINK_DPIPE_ACTION_TYPE_FIELD_MODIFY,
 };
 
+enum devlink_dpipe_field_ethernet_id {
+	DEVLINK_DPIPE_FIELD_ETHERNET_DST_MAC,
+};
+
+enum devlink_dpipe_header_id {
+	DEVLINK_DPIPE_HEADER_ETHERNET,
+};
+
 #endif /* _UAPI_LINUX_DEVLINK_H_ */
diff --git a/net/core/devlink.c b/net/core/devlink.c
index a0adfc31a3fe..4f6f3d601785 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -29,6 +29,23 @@
 #define CREATE_TRACE_POINTS
 #include <trace/events/devlink.h>
 
+static struct devlink_dpipe_field devlink_dpipe_fields_ethernet[] = {
+	{
+		.name = "destination_mac",
+		.id = DEVLINK_DPIPE_FIELD_ETHERNET_DST_MAC,
+		.bitwidth = 48,
+	},
+};
+
+struct devlink_dpipe_header devlink_dpipe_header_ethernet = {
+	.name = "ethernet",
+	.id = DEVLINK_DPIPE_HEADER_ETHERNET,
+	.fields = devlink_dpipe_fields_ethernet,
+	.fields_count = ARRAY_SIZE(devlink_dpipe_fields_ethernet),
+	.global = true,
+};
+EXPORT_SYMBOL(devlink_dpipe_header_ethernet);
+
 EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_hwmsg);
 
 static LIST_HEAD(devlink_list);
-- 
cgit v1.2.3


From 3fb886ecea93605a8ea14e258ff3158b8966781e Mon Sep 17 00:00:00 2001
From: Arkadi Sharshevsky <arkadis@mellanox.com>
Date: Thu, 24 Aug 2017 08:40:00 +0200
Subject: devlink: Add IPv4 header for dpipe

This will be used by the IPv4 host table which will be introduced in the
following patches. This header is global and can be reused by many
drivers.

Signed-off-by: Arkadi Sharshevsky <arkadis@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/devlink.h        |  1 +
 include/uapi/linux/devlink.h |  5 +++++
 net/core/devlink.c           | 17 +++++++++++++++++
 3 files changed, 23 insertions(+)

(limited to 'include/net')

diff --git a/include/net/devlink.h b/include/net/devlink.h
index ddb8b5227580..8ff8a6f77f29 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -329,6 +329,7 @@ int devlink_dpipe_action_put(struct sk_buff *skb,
 int devlink_dpipe_match_put(struct sk_buff *skb,
 			    struct devlink_dpipe_match *match);
 extern struct devlink_dpipe_header devlink_dpipe_header_ethernet;
+extern struct devlink_dpipe_header devlink_dpipe_header_ipv4;
 
 #else
 
diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index a855f8dcc8ee..6c172548589d 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -230,8 +230,13 @@ enum devlink_dpipe_field_ethernet_id {
 	DEVLINK_DPIPE_FIELD_ETHERNET_DST_MAC,
 };
 
+enum devlink_dpipe_field_ipv4_id {
+	DEVLINK_DPIPE_FIELD_IPV4_DST_IP,
+};
+
 enum devlink_dpipe_header_id {
 	DEVLINK_DPIPE_HEADER_ETHERNET,
+	DEVLINK_DPIPE_HEADER_IPV4,
 };
 
 #endif /* _UAPI_LINUX_DEVLINK_H_ */
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 4f6f3d601785..fcf5dd662882 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -46,6 +46,23 @@ struct devlink_dpipe_header devlink_dpipe_header_ethernet = {
 };
 EXPORT_SYMBOL(devlink_dpipe_header_ethernet);
 
+static struct devlink_dpipe_field devlink_dpipe_fields_ipv4[] = {
+	{
+		.name = "destination ip",
+		.id = DEVLINK_DPIPE_FIELD_IPV4_DST_IP,
+		.bitwidth = 32,
+	},
+};
+
+struct devlink_dpipe_header devlink_dpipe_header_ipv4 = {
+	.name = "ipv4",
+	.id = DEVLINK_DPIPE_HEADER_IPV4,
+	.fields = devlink_dpipe_fields_ipv4,
+	.fields_count = ARRAY_SIZE(devlink_dpipe_fields_ipv4),
+	.global = true,
+};
+EXPORT_SYMBOL(devlink_dpipe_header_ipv4);
+
 EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_hwmsg);
 
 static LIST_HEAD(devlink_list);
-- 
cgit v1.2.3


From ffd3cdccf214cf0df08856a6738544076c4cd548 Mon Sep 17 00:00:00 2001
From: Arkadi Sharshevsky <arkadis@mellanox.com>
Date: Thu, 24 Aug 2017 08:40:02 +0200
Subject: devlink: Add support for dynamic table size

Up until now the dpipe table's size was static and known at registration
time. The host table does not have constant size and it is resized in
dynamic manner. In order to support this behavior the size is changed
to be obtained dynamically via an op.

This patch also adjust the current dpipe table for the new API.

Signed-off-by: Arkadi Sharshevsky <arkadis@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c | 13 +++++++++----
 include/net/devlink.h                                |  7 +++----
 net/core/devlink.c                                   | 12 +++++++-----
 3 files changed, 19 insertions(+), 13 deletions(-)

(limited to 'include/net')

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c
index e9e1f7aeb04f..9eb265bd5430 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c
@@ -301,24 +301,29 @@ static int mlxsw_sp_dpipe_table_erif_counters_update(void *priv, bool enable)
 	return 0;
 }
 
+static u64 mlxsw_sp_dpipe_table_erif_size_get(void *priv)
+{
+	struct mlxsw_sp *mlxsw_sp = priv;
+
+	return MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
+}
+
 static struct devlink_dpipe_table_ops mlxsw_sp_erif_ops = {
 	.matches_dump = mlxsw_sp_dpipe_table_erif_matches_dump,
 	.actions_dump = mlxsw_sp_dpipe_table_erif_actions_dump,
 	.entries_dump = mlxsw_sp_dpipe_table_erif_entries_dump,
 	.counters_set_update = mlxsw_sp_dpipe_table_erif_counters_update,
+	.size_get = mlxsw_sp_dpipe_table_erif_size_get,
 };
 
 static int mlxsw_sp_dpipe_erif_table_init(struct mlxsw_sp *mlxsw_sp)
 {
 	struct devlink *devlink = priv_to_devlink(mlxsw_sp->core);
-	u64 table_size;
 
-	table_size = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
 	return devlink_dpipe_table_register(devlink,
 					    MLXSW_SP_DPIPE_TABLE_NAME_ERIF,
 					    &mlxsw_sp_erif_ops,
-					    mlxsw_sp, table_size,
-					    false);
+					    mlxsw_sp, false);
 }
 
 static void mlxsw_sp_dpipe_erif_table_fini(struct mlxsw_sp *mlxsw_sp)
diff --git a/include/net/devlink.h b/include/net/devlink.h
index 8ff8a6f77f29..e96272b2cfec 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -178,7 +178,6 @@ struct devlink_dpipe_table_ops;
  * struct devlink_dpipe_table - table object
  * @priv: private
  * @name: table name
- * @size: maximum number of entries
  * @counters_enabled: indicates if counters are active
  * @counter_control_extern: indicates if counter control is in dpipe or
  *			    external tool
@@ -189,7 +188,6 @@ struct devlink_dpipe_table {
 	void *priv;
 	struct list_head list;
 	const char *name;
-	u64 size;
 	bool counters_enabled;
 	bool counter_control_extern;
 	struct devlink_dpipe_table_ops *table_ops;
@@ -204,6 +202,7 @@ struct devlink_dpipe_table {
  * @counters_set_update - when changing the counter status hardware sync
  *			  maybe needed to allocate/free counter related
  *			  resources
+ * @size_get - get size
  */
 struct devlink_dpipe_table_ops {
 	int (*actions_dump)(void *priv, struct sk_buff *skb);
@@ -211,6 +210,7 @@ struct devlink_dpipe_table_ops {
 	int (*entries_dump)(void *priv, bool counters_enabled,
 			    struct devlink_dpipe_dump_ctx *dump_ctx);
 	int (*counters_set_update)(void *priv, bool enable);
+	u64 (*size_get)(void *priv);
 };
 
 /**
@@ -311,8 +311,7 @@ void devlink_sb_unregister(struct devlink *devlink, unsigned int sb_index);
 int devlink_dpipe_table_register(struct devlink *devlink,
 				 const char *table_name,
 				 struct devlink_dpipe_table_ops *table_ops,
-				 void *priv, u64 size,
-				 bool counter_control_extern);
+				 void *priv, bool counter_control_extern);
 void devlink_dpipe_table_unregister(struct devlink *devlink,
 				    const char *table_name);
 int devlink_dpipe_headers_register(struct devlink *devlink,
diff --git a/net/core/devlink.c b/net/core/devlink.c
index fcf5dd662882..87062ff36ab4 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -1647,13 +1647,15 @@ static int devlink_dpipe_table_put(struct sk_buff *skb,
 				   struct devlink_dpipe_table *table)
 {
 	struct nlattr *table_attr;
+	u64 table_size;
 
+	table_size = table->table_ops->size_get(table->priv);
 	table_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_TABLE);
 	if (!table_attr)
 		return -EMSGSIZE;
 
 	if (nla_put_string(skb, DEVLINK_ATTR_DPIPE_TABLE_NAME, table->name) ||
-	    nla_put_u64_64bit(skb, DEVLINK_ATTR_DPIPE_TABLE_SIZE, table->size,
+	    nla_put_u64_64bit(skb, DEVLINK_ATTR_DPIPE_TABLE_SIZE, table_size,
 			      DEVLINK_ATTR_PAD))
 		goto nla_put_failure;
 	if (nla_put_u8(skb, DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED,
@@ -2718,20 +2720,21 @@ EXPORT_SYMBOL_GPL(devlink_dpipe_table_counter_enabled);
  *	@table_name: table name
  *	@table_ops: table ops
  *	@priv: priv
- *	@size: size
  *	@counter_control_extern: external control for counters
  */
 int devlink_dpipe_table_register(struct devlink *devlink,
 				 const char *table_name,
 				 struct devlink_dpipe_table_ops *table_ops,
-				 void *priv, u64 size,
-				 bool counter_control_extern)
+				 void *priv, bool counter_control_extern)
 {
 	struct devlink_dpipe_table *table;
 
 	if (devlink_dpipe_table_find(&devlink->dpipe_table_list, table_name))
 		return -EEXIST;
 
+	if (WARN_ON(!table_ops->size_get))
+		return -EINVAL;
+
 	table = kzalloc(sizeof(*table), GFP_KERNEL);
 	if (!table)
 		return -ENOMEM;
@@ -2739,7 +2742,6 @@ int devlink_dpipe_table_register(struct devlink *devlink,
 	table->name = table_name;
 	table->table_ops = table_ops;
 	table->priv = priv;
-	table->size = size;
 	table->counter_control_extern = counter_control_extern;
 
 	mutex_lock(&devlink_mutex);
-- 
cgit v1.2.3


From 3580732448f128c39e7325912bc4368ade5dce7d Mon Sep 17 00:00:00 2001
From: Arkadi Sharshevsky <arkadis@mellanox.com>
Date: Thu, 24 Aug 2017 08:40:03 +0200
Subject: devlink: Move dpipe entry clear function into devlink

The entry clear routine can be shared between the drivers, thus it is
moved inside devlink.

Signed-off-by: Arkadi Sharshevsky <arkadis@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/mellanox/mlxsw/spectrum_dpipe.c   | 24 ++--------------------
 include/net/devlink.h                              |  6 ++++++
 net/core/devlink.c                                 | 22 ++++++++++++++++++++
 3 files changed, 30 insertions(+), 22 deletions(-)

(limited to 'include/net')

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c
index 9eb265bd5430..1305df9c6bf9 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c
@@ -114,26 +114,6 @@ static int mlxsw_sp_dpipe_table_erif_matches_dump(void *priv,
 	return devlink_dpipe_match_put(skb, &match);
 }
 
-static void mlxsw_sp_erif_entry_clear(struct devlink_dpipe_entry *entry)
-{
-	unsigned int value_count, value_index;
-	struct devlink_dpipe_value *value;
-
-	value = entry->action_values;
-	value_count = entry->action_values_count;
-	for (value_index = 0; value_index < value_count; value_index++) {
-		kfree(value[value_index].value);
-		kfree(value[value_index].mask);
-	}
-
-	value = entry->match_values;
-	value_count = entry->match_values_count;
-	for (value_index = 0; value_index < value_count; value_index++) {
-		kfree(value[value_index].value);
-		kfree(value[value_index].mask);
-	}
-}
-
 static void
 mlxsw_sp_erif_match_action_prepare(struct devlink_dpipe_match *match,
 				   struct devlink_dpipe_action *action)
@@ -270,12 +250,12 @@ start_again:
 		goto start_again;
 	rtnl_unlock();
 
-	mlxsw_sp_erif_entry_clear(&entry);
+	devlink_dpipe_entry_clear(&entry);
 	return 0;
 err_entry_append:
 err_entry_get:
 	rtnl_unlock();
-	mlxsw_sp_erif_entry_clear(&entry);
+	devlink_dpipe_entry_clear(&entry);
 	return err;
 }
 
diff --git a/include/net/devlink.h b/include/net/devlink.h
index e96272b2cfec..047a0c54f652 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -323,6 +323,7 @@ int devlink_dpipe_entry_ctx_prepare(struct devlink_dpipe_dump_ctx *dump_ctx);
 int devlink_dpipe_entry_ctx_append(struct devlink_dpipe_dump_ctx *dump_ctx,
 				   struct devlink_dpipe_entry *entry);
 int devlink_dpipe_entry_ctx_close(struct devlink_dpipe_dump_ctx *dump_ctx);
+void devlink_dpipe_entry_clear(struct devlink_dpipe_entry *entry);
 int devlink_dpipe_action_put(struct sk_buff *skb,
 			     struct devlink_dpipe_action *action);
 int devlink_dpipe_match_put(struct sk_buff *skb,
@@ -448,6 +449,11 @@ devlink_dpipe_entry_ctx_close(struct devlink_dpipe_dump_ctx *dump_ctx)
 	return 0;
 }
 
+static inline void
+devlink_dpipe_entry_clear(struct devlink_dpipe_entry *entry)
+{
+}
+
 static inline int
 devlink_dpipe_action_put(struct sk_buff *skb,
 			 struct devlink_dpipe_action *action)
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 87062ff36ab4..194708aa5f11 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -1996,6 +1996,28 @@ int devlink_dpipe_entry_ctx_close(struct devlink_dpipe_dump_ctx *dump_ctx)
 }
 EXPORT_SYMBOL_GPL(devlink_dpipe_entry_ctx_close);
 
+void devlink_dpipe_entry_clear(struct devlink_dpipe_entry *entry)
+
+{
+	unsigned int value_count, value_index;
+	struct devlink_dpipe_value *value;
+
+	value = entry->action_values;
+	value_count = entry->action_values_count;
+	for (value_index = 0; value_index < value_count; value_index++) {
+		kfree(value[value_index].value);
+		kfree(value[value_index].mask);
+	}
+
+	value = entry->match_values;
+	value_count = entry->match_values_count;
+	for (value_index = 0; value_index < value_count; value_index++) {
+		kfree(value[value_index].value);
+		kfree(value[value_index].mask);
+	}
+}
+EXPORT_SYMBOL(devlink_dpipe_entry_clear);
+
 static int devlink_dpipe_entries_fill(struct genl_info *info,
 				      enum devlink_command cmd, int flags,
 				      struct devlink_dpipe_table *table)
-- 
cgit v1.2.3


From 0d03510038bda70b5a4a252e8216822e6ce0cbdb Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Sat, 12 Aug 2017 00:57:02 +0200
Subject: netfilter: conntrack: compute l3proto nla size at compile time

avoids a pointer and allows struct to be const later on.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_l3proto.h   | 19 ++++++++-----------
 net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 13 +++++++------
 net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 14 ++++++++------
 net/netfilter/nf_conntrack_netlink.c           |  3 ++-
 net/netfilter/nf_conntrack_proto.c             |  9 +++------
 5 files changed, 28 insertions(+), 30 deletions(-)

(limited to 'include/net')

diff --git a/include/net/netfilter/nf_conntrack_l3proto.h b/include/net/netfilter/nf_conntrack_l3proto.h
index 1b8de164d744..6a27ffea7480 100644
--- a/include/net/netfilter/nf_conntrack_l3proto.h
+++ b/include/net/netfilter/nf_conntrack_l3proto.h
@@ -20,6 +20,9 @@ struct nf_conntrack_l3proto {
 	/* L3 Protocol Family number. ex) PF_INET */
 	u_int16_t l3proto;
 
+	/* size of tuple nlattr, fills a hole */
+	u16 nla_size;
+
 	/* Protocol name */
 	const char *name;
 
@@ -49,23 +52,17 @@ struct nf_conntrack_l3proto {
 	int (*get_l4proto)(const struct sk_buff *skb, unsigned int nhoff,
 			   unsigned int *dataoff, u_int8_t *protonum);
 
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
 	int (*tuple_to_nlattr)(struct sk_buff *skb,
 			       const struct nf_conntrack_tuple *t);
-
-	/* Called when netns wants to use connection tracking */
-	int (*net_ns_get)(struct net *);
-	void (*net_ns_put)(struct net *);
-
-	/*
-	 * Calculate size of tuple nlattr
-	 */
-	int (*nlattr_tuple_size)(void);
-
 	int (*nlattr_to_tuple)(struct nlattr *tb[],
 			       struct nf_conntrack_tuple *t);
 	const struct nla_policy *nla_policy;
+#endif
 
-	size_t nla_size;
+	/* Called when netns wants to use connection tracking */
+	int (*net_ns_get)(struct net *);
+	void (*net_ns_put)(struct net *);
 
 	/* Module (if any) which this is connected to. */
 	struct module *me;
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index de5f0e6ddd1b..9fb8cb033d80 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -303,11 +303,6 @@ static int ipv4_nlattr_to_tuple(struct nlattr *tb[],
 
 	return 0;
 }
-
-static int ipv4_nlattr_tuple_size(void)
-{
-	return nla_policy_len(ipv4_nla_policy, CTA_IP_MAX + 1);
-}
 #endif
 
 static struct nf_sockopt_ops so_getorigdst = {
@@ -365,9 +360,10 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 __read_mostly = {
 	.get_l4proto	 = ipv4_get_l4proto,
 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
 	.tuple_to_nlattr = ipv4_tuple_to_nlattr,
-	.nlattr_tuple_size = ipv4_nlattr_tuple_size,
 	.nlattr_to_tuple = ipv4_nlattr_to_tuple,
 	.nla_policy	 = ipv4_nla_policy,
+	.nla_size	 = NLA_ALIGN(NLA_HDRLEN + sizeof(u32)) + /* CTA_IP_V4_SRC */
+			   NLA_ALIGN(NLA_HDRLEN + sizeof(u32)),  /* CTA_IP_V4_DST */
 #endif
 	.net_ns_get	 = ipv4_hooks_register,
 	.net_ns_put	 = ipv4_hooks_unregister,
@@ -421,6 +417,11 @@ static int __init nf_conntrack_l3proto_ipv4_init(void)
 
 	need_conntrack();
 
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
+	if (WARN_ON(nla_policy_len(ipv4_nla_policy, CTA_IP_MAX + 1) !=
+	    nf_conntrack_l3proto_ipv4.nla_size))
+		return -EINVAL;
+#endif
 	ret = nf_register_sockopt(&so_getorigdst);
 	if (ret < 0) {
 		pr_err("Unable to register netfilter socket option\n");
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index ddef5ee9e0a8..6b4d59fd0214 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -308,11 +308,6 @@ static int ipv6_nlattr_to_tuple(struct nlattr *tb[],
 
 	return 0;
 }
-
-static int ipv6_nlattr_tuple_size(void)
-{
-	return nla_policy_len(ipv6_nla_policy, CTA_IP_MAX + 1);
-}
 #endif
 
 static int ipv6_hooks_register(struct net *net)
@@ -360,9 +355,10 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 __read_mostly = {
 	.get_l4proto		= ipv6_get_l4proto,
 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
 	.tuple_to_nlattr	= ipv6_tuple_to_nlattr,
-	.nlattr_tuple_size	= ipv6_nlattr_tuple_size,
 	.nlattr_to_tuple	= ipv6_nlattr_to_tuple,
 	.nla_policy		= ipv6_nla_policy,
+	.nla_size		= NLA_ALIGN(NLA_HDRLEN + sizeof(u32[4])) +
+				  NLA_ALIGN(NLA_HDRLEN + sizeof(u32[4])),
 #endif
 	.net_ns_get		= ipv6_hooks_register,
 	.net_ns_put		= ipv6_hooks_unregister,
@@ -421,6 +417,12 @@ static int __init nf_conntrack_l3proto_ipv6_init(void)
 
 	need_conntrack();
 
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
+	if (WARN_ON(nla_policy_len(ipv6_nla_policy, CTA_IP_MAX + 1) !=
+	    nf_conntrack_l3proto_ipv6.nla_size))
+		return -EINVAL;
+#endif
+
 	ret = nf_register_sockopt(&so_getorigdst6);
 	if (ret < 0) {
 		pr_err("Unable to register netfilter socket option\n");
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index f4ca48817f66..b59a453a0fd8 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -540,7 +540,8 @@ static inline size_t ctnetlink_proto_size(const struct nf_conn *ct)
 	size_t len = 0;
 
 	l3proto = __nf_ct_l3proto_find(nf_ct_l3num(ct));
-	len += l3proto->nla_size;
+	len = l3proto->nla_size;
+	len *= 3u; /* ORIG, REPLY, MASTER */
 
 	l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
 	len += l4proto->nla_size;
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index 27810cf816a6..85104a27cc89 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -214,10 +214,10 @@ int nf_ct_l3proto_register(struct nf_conntrack_l3proto *proto)
 
 	if (proto->l3proto >= NFPROTO_NUMPROTO)
 		return -EBUSY;
-
-	if (proto->tuple_to_nlattr && !proto->nlattr_tuple_size)
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
+	if (proto->tuple_to_nlattr && proto->nla_size == 0)
 		return -EINVAL;
-
+#endif
 	mutex_lock(&nf_ct_proto_mutex);
 	old = rcu_dereference_protected(nf_ct_l3protos[proto->l3proto],
 					lockdep_is_held(&nf_ct_proto_mutex));
@@ -226,9 +226,6 @@ int nf_ct_l3proto_register(struct nf_conntrack_l3proto *proto)
 		goto out_unlock;
 	}
 
-	if (proto->nlattr_tuple_size)
-		proto->nla_size = 3 * proto->nlattr_tuple_size();
-
 	rcu_assign_pointer(nf_ct_l3protos[proto->l3proto], proto);
 
 out_unlock:
-- 
cgit v1.2.3


From a3134d537f8209f5b149d7ed9f287047158845f0 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Sat, 12 Aug 2017 00:57:03 +0200
Subject: netfilter: conntrack: remove protocol name from l3proto struct

no need to waste storage for something that is only needed
in one place and can be deduced from protocol number.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_l3proto.h   |  3 ---
 net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c |  1 -
 net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c |  1 -
 net/netfilter/nf_conntrack_l3proto_generic.c   |  1 -
 net/netfilter/nf_conntrack_standalone.c        | 12 +++++++++++-
 5 files changed, 11 insertions(+), 7 deletions(-)

(limited to 'include/net')

diff --git a/include/net/netfilter/nf_conntrack_l3proto.h b/include/net/netfilter/nf_conntrack_l3proto.h
index 6a27ffea7480..e31861e4fa6a 100644
--- a/include/net/netfilter/nf_conntrack_l3proto.h
+++ b/include/net/netfilter/nf_conntrack_l3proto.h
@@ -23,9 +23,6 @@ struct nf_conntrack_l3proto {
 	/* size of tuple nlattr, fills a hole */
 	u16 nla_size;
 
-	/* Protocol name */
-	const char *name;
-
 	/*
 	 * Try to fill in the third arg: nhoff is offset of l3 proto
          * hdr.  Return true if possible.
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 9fb8cb033d80..9f7ea862068c 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -353,7 +353,6 @@ static void ipv4_hooks_unregister(struct net *net)
 
 struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 __read_mostly = {
 	.l3proto	 = PF_INET,
-	.name		 = "ipv4",
 	.pkt_to_tuple	 = ipv4_pkt_to_tuple,
 	.invert_tuple	 = ipv4_invert_tuple,
 	.print_tuple	 = ipv4_print_tuple,
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index 6b4d59fd0214..91d37fbe28de 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -348,7 +348,6 @@ static void ipv6_hooks_unregister(struct net *net)
 
 struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 __read_mostly = {
 	.l3proto		= PF_INET6,
-	.name			= "ipv6",
 	.pkt_to_tuple		= ipv6_pkt_to_tuple,
 	.invert_tuple		= ipv6_invert_tuple,
 	.print_tuple		= ipv6_print_tuple,
diff --git a/net/netfilter/nf_conntrack_l3proto_generic.c b/net/netfilter/nf_conntrack_l3proto_generic.c
index cf9ace70bece..0387971582bc 100644
--- a/net/netfilter/nf_conntrack_l3proto_generic.c
+++ b/net/netfilter/nf_conntrack_l3proto_generic.c
@@ -64,7 +64,6 @@ static int generic_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
 
 struct nf_conntrack_l3proto nf_conntrack_l3proto_generic __read_mostly = {
 	.l3proto	 = PF_UNSPEC,
-	.name		 = "unknown",
 	.pkt_to_tuple	 = generic_pkt_to_tuple,
 	.invert_tuple	 = generic_invert_tuple,
 	.print_tuple	 = generic_print_tuple,
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 5b6c675d55b1..359d7e6a4503 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -198,6 +198,16 @@ ct_show_delta_time(struct seq_file *s, const struct nf_conn *ct)
 }
 #endif
 
+static const char* l3proto_name(u16 proto)
+{
+	switch (proto) {
+	case AF_INET: return "ipv4";
+	case AF_INET6: return "ipv6";
+	}
+
+	return "unknown";
+}
+
 /* return 0 on success, 1 in case of error */
 static int ct_seq_show(struct seq_file *s, void *v)
 {
@@ -231,7 +241,7 @@ static int ct_seq_show(struct seq_file *s, void *v)
 
 	ret = -ENOSPC;
 	seq_printf(s, "%-8s %u %-8s %u %ld ",
-		   l3proto->name, nf_ct_l3num(ct),
+		   l3proto_name(l3proto->l3proto), nf_ct_l3num(ct),
 		   l4proto->name, nf_ct_protonum(ct),
 		   nf_ct_expires(ct)  / HZ);
 
-- 
cgit v1.2.3


From 09ec82f5af99d1e35614eb0844b920fc335a313d Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Sat, 12 Aug 2017 00:57:04 +0200
Subject: netfilter: conntrack: remove protocol name from l4proto struct

no need to waste storage for something that is only needed
in one place and can be deduced from protocol number.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_l4proto.h   |  3 ---
 net/ipv4/netfilter/nf_conntrack_proto_icmp.c   |  1 -
 net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c |  1 -
 net/netfilter/nf_conntrack_proto.c             |  8 ++++----
 net/netfilter/nf_conntrack_proto_dccp.c        |  2 --
 net/netfilter/nf_conntrack_proto_generic.c     |  1 -
 net/netfilter/nf_conntrack_proto_gre.c         |  1 -
 net/netfilter/nf_conntrack_proto_sctp.c        |  2 --
 net/netfilter/nf_conntrack_proto_tcp.c         |  2 --
 net/netfilter/nf_conntrack_proto_udp.c         |  4 ----
 net/netfilter/nf_conntrack_standalone.c        | 17 ++++++++++++++++-
 11 files changed, 20 insertions(+), 22 deletions(-)

(limited to 'include/net')

diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h
index b6e27cafb1d9..47c16bae5e00 100644
--- a/include/net/netfilter/nf_conntrack_l4proto.h
+++ b/include/net/netfilter/nf_conntrack_l4proto.h
@@ -108,9 +108,6 @@ struct nf_conntrack_l4proto {
 	/* Return the per-net protocol part. */
 	struct nf_proto_net *(*get_net_proto)(struct net *net);
 
-	/* Protocol name */
-	const char *name;
-
 	/* Module (if any) which this is connected to. */
 	struct module *me;
 };
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index 73c591d8a9a8..fdbeb03e4600 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -362,7 +362,6 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp __read_mostly =
 {
 	.l3proto		= PF_INET,
 	.l4proto		= IPPROTO_ICMP,
-	.name			= "icmp",
 	.pkt_to_tuple		= icmp_pkt_to_tuple,
 	.invert_tuple		= icmp_invert_tuple,
 	.print_tuple		= icmp_print_tuple,
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index d5f028e33f65..805ab122767a 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -367,7 +367,6 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 __read_mostly =
 {
 	.l3proto		= PF_INET6,
 	.l4proto		= IPPROTO_ICMPV6,
-	.name			= "icmpv6",
 	.pkt_to_tuple		= icmpv6_pkt_to_tuple,
 	.invert_tuple		= icmpv6_invert_tuple,
 	.print_tuple		= icmpv6_print_tuple,
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index 85104a27cc89..0ecab7163d62 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -437,8 +437,8 @@ int nf_ct_l4proto_register(struct nf_conntrack_l4proto *l4proto[],
 	}
 	if (i != num_proto) {
 		ver = l4proto[i]->l3proto == PF_INET6 ? 6 : 4;
-		pr_err("nf_conntrack_ipv%d: can't register %s%d proto.\n",
-		       ver, l4proto[i]->name, ver);
+		pr_err("nf_conntrack_ipv%d: can't register l4 %d proto.\n",
+		       ver, l4proto[i]->l4proto);
 		nf_ct_l4proto_unregister(l4proto, i);
 	}
 	return ret;
@@ -458,8 +458,8 @@ int nf_ct_l4proto_pernet_register(struct net *net,
 			break;
 	}
 	if (i != num_proto) {
-		pr_err("nf_conntrack_%s%d: pernet registration failed\n",
-		       l4proto[i]->name,
+		pr_err("nf_conntrack_proto_%d %d: pernet registration failed\n",
+		       l4proto[i]->l4proto,
 		       l4proto[i]->l3proto == PF_INET6 ? 6 : 4);
 		nf_ct_l4proto_pernet_unregister(net, l4proto, i);
 	}
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index 4707d997558a..a0492184a0a8 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -880,7 +880,6 @@ static struct nf_proto_net *dccp_get_net_proto(struct net *net)
 struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4 __read_mostly = {
 	.l3proto		= AF_INET,
 	.l4proto		= IPPROTO_DCCP,
-	.name			= "dccp",
 	.pkt_to_tuple		= dccp_pkt_to_tuple,
 	.invert_tuple		= dccp_invert_tuple,
 	.new			= dccp_new,
@@ -916,7 +915,6 @@ EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_dccp4);
 struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp6 __read_mostly = {
 	.l3proto		= AF_INET6,
 	.l4proto		= IPPROTO_DCCP,
-	.name			= "dccp",
 	.pkt_to_tuple		= dccp_pkt_to_tuple,
 	.invert_tuple		= dccp_invert_tuple,
 	.new			= dccp_new,
diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c
index d5868bad33a7..4fe8b3312823 100644
--- a/net/netfilter/nf_conntrack_proto_generic.c
+++ b/net/netfilter/nf_conntrack_proto_generic.c
@@ -187,7 +187,6 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_generic __read_mostly =
 {
 	.l3proto		= PF_UNSPEC,
 	.l4proto		= 255,
-	.name			= "unknown",
 	.pkt_to_tuple		= generic_pkt_to_tuple,
 	.invert_tuple		= generic_invert_tuple,
 	.print_tuple		= generic_print_tuple,
diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index 87bb40a3feb5..984bcfdbd4d7 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -364,7 +364,6 @@ static int gre_init_net(struct net *net, u_int16_t proto)
 static struct nf_conntrack_l4proto nf_conntrack_l4proto_gre4 __read_mostly = {
 	.l3proto	 = AF_INET,
 	.l4proto	 = IPPROTO_GRE,
-	.name		 = "gre",
 	.pkt_to_tuple	 = gre_pkt_to_tuple,
 	.invert_tuple	 = gre_invert_tuple,
 	.print_tuple	 = gre_print_tuple,
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index 6eef29d2eec4..1d7a995ea049 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -791,7 +791,6 @@ static struct nf_proto_net *sctp_get_net_proto(struct net *net)
 struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 __read_mostly = {
 	.l3proto		= PF_INET,
 	.l4proto 		= IPPROTO_SCTP,
-	.name 			= "sctp",
 	.pkt_to_tuple 		= sctp_pkt_to_tuple,
 	.invert_tuple 		= sctp_invert_tuple,
 	.print_tuple 		= sctp_print_tuple,
@@ -828,7 +827,6 @@ EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_sctp4);
 struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 __read_mostly = {
 	.l3proto		= PF_INET6,
 	.l4proto 		= IPPROTO_SCTP,
-	.name 			= "sctp",
 	.pkt_to_tuple 		= sctp_pkt_to_tuple,
 	.invert_tuple 		= sctp_invert_tuple,
 	.print_tuple 		= sctp_print_tuple,
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 9758a7dfd83e..e3e59e3d0592 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -1556,7 +1556,6 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 __read_mostly =
 {
 	.l3proto		= PF_INET,
 	.l4proto 		= IPPROTO_TCP,
-	.name 			= "tcp",
 	.pkt_to_tuple 		= tcp_pkt_to_tuple,
 	.invert_tuple 		= tcp_invert_tuple,
 	.print_tuple 		= tcp_print_tuple,
@@ -1594,7 +1593,6 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 __read_mostly =
 {
 	.l3proto		= PF_INET6,
 	.l4proto 		= IPPROTO_TCP,
-	.name 			= "tcp",
 	.pkt_to_tuple 		= tcp_pkt_to_tuple,
 	.invert_tuple 		= tcp_invert_tuple,
 	.print_tuple 		= tcp_print_tuple,
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
index f6ebce6178ca..ec861a1169f1 100644
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -313,7 +313,6 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4 __read_mostly =
 {
 	.l3proto		= PF_INET,
 	.l4proto		= IPPROTO_UDP,
-	.name			= "udp",
 	.allow_clash		= true,
 	.pkt_to_tuple		= udp_pkt_to_tuple,
 	.invert_tuple		= udp_invert_tuple,
@@ -347,7 +346,6 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4 __read_mostly =
 {
 	.l3proto		= PF_INET,
 	.l4proto		= IPPROTO_UDPLITE,
-	.name			= "udplite",
 	.allow_clash		= true,
 	.pkt_to_tuple		= udp_pkt_to_tuple,
 	.invert_tuple		= udp_invert_tuple,
@@ -381,7 +379,6 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6 __read_mostly =
 {
 	.l3proto		= PF_INET6,
 	.l4proto		= IPPROTO_UDP,
-	.name			= "udp",
 	.allow_clash		= true,
 	.pkt_to_tuple		= udp_pkt_to_tuple,
 	.invert_tuple		= udp_invert_tuple,
@@ -415,7 +412,6 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6 __read_mostly =
 {
 	.l3proto		= PF_INET6,
 	.l4proto		= IPPROTO_UDPLITE,
-	.name			= "udplite",
 	.allow_clash		= true,
 	.pkt_to_tuple		= udp_pkt_to_tuple,
 	.invert_tuple		= udp_invert_tuple,
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 359d7e6a4503..b28f9e93f574 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -208,6 +208,21 @@ static const char* l3proto_name(u16 proto)
 	return "unknown";
 }
 
+static const char* l4proto_name(u16 proto)
+{
+	switch (proto) {
+	case IPPROTO_ICMP: return "icmp";
+	case IPPROTO_TCP: return "tcp";
+	case IPPROTO_UDP: return "udp";
+	case IPPROTO_DCCP: return "dccp";
+	case IPPROTO_GRE: return "gre";
+	case IPPROTO_SCTP: return "sctp";
+	case IPPROTO_UDPLITE: return "udplite";
+	}
+
+	return "unknown";
+}
+
 /* return 0 on success, 1 in case of error */
 static int ct_seq_show(struct seq_file *s, void *v)
 {
@@ -242,7 +257,7 @@ static int ct_seq_show(struct seq_file *s, void *v)
 	ret = -ENOSPC;
 	seq_printf(s, "%-8s %u %-8s %u %ld ",
 		   l3proto_name(l3proto->l3proto), nf_ct_l3num(ct),
-		   l4proto->name, nf_ct_protonum(ct),
+		   l4proto_name(l4proto->l4proto), nf_ct_protonum(ct),
 		   nf_ct_expires(ct)  / HZ);
 
 	if (l4proto->print_conntrack)
-- 
cgit v1.2.3


From 036c69400a34cf8f6d39c88325dd510462809e7b Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Sat, 12 Aug 2017 00:57:05 +0200
Subject: netfilter: conntrack: reduce size of l4protocol trackers

can use u16 for both, shrinks size by another 8 bytes.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_l4proto.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/net')

diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h
index 47c16bae5e00..15c58dd3f701 100644
--- a/include/net/netfilter/nf_conntrack_l4proto.h
+++ b/include/net/netfilter/nf_conntrack_l4proto.h
@@ -92,12 +92,12 @@ struct nf_conntrack_l4proto {
 
 #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
 	struct {
-		size_t obj_size;
 		int (*nlattr_to_obj)(struct nlattr *tb[],
 				     struct net *net, void *data);
 		int (*obj_to_nlattr)(struct sk_buff *skb, const void *data);
 
-		unsigned int nlattr_max;
+		u16 obj_size;
+		u16 nlattr_max;
 		const struct nla_policy *nla_policy;
 	} ctnl_timeout;
 #endif
-- 
cgit v1.2.3


From 91950833dd5a34ac6336aa88da6d43aaeb56ac6d Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Sat, 12 Aug 2017 00:57:06 +0200
Subject: netfilter: conntrack: place print_tuple in procfs part

CONFIG_NF_CONNTRACK_PROCFS is deprecated, no need to use a function
pointer in the trackers for this. Place the printf formatting in
the one place that uses it.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_l3proto.h   |  4 --
 include/net/netfilter/nf_conntrack_l4proto.h   |  4 --
 net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c |  8 ----
 net/ipv4/netfilter/nf_conntrack_proto_icmp.c   | 11 -----
 net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c |  8 ----
 net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c | 11 -----
 net/netfilter/nf_conntrack_l3proto_generic.c   |  6 ---
 net/netfilter/nf_conntrack_proto_dccp.c        | 10 -----
 net/netfilter/nf_conntrack_proto_generic.c     |  7 ----
 net/netfilter/nf_conntrack_proto_gre.c         | 10 -----
 net/netfilter/nf_conntrack_proto_sctp.c        | 11 -----
 net/netfilter/nf_conntrack_proto_tcp.c         | 11 -----
 net/netfilter/nf_conntrack_proto_udp.c         | 13 ------
 net/netfilter/nf_conntrack_standalone.c        | 58 +++++++++++++++++++++++++-
 14 files changed, 56 insertions(+), 116 deletions(-)

(limited to 'include/net')

diff --git a/include/net/netfilter/nf_conntrack_l3proto.h b/include/net/netfilter/nf_conntrack_l3proto.h
index e31861e4fa6a..dabb53b0913c 100644
--- a/include/net/netfilter/nf_conntrack_l3proto.h
+++ b/include/net/netfilter/nf_conntrack_l3proto.h
@@ -37,10 +37,6 @@ struct nf_conntrack_l3proto {
 	bool (*invert_tuple)(struct nf_conntrack_tuple *inverse,
 			     const struct nf_conntrack_tuple *orig);
 
-	/* Print out the per-protocol part of the tuple. */
-	void (*print_tuple)(struct seq_file *s,
-			    const struct nf_conntrack_tuple *);
-
 	/*
 	 * Called before tracking. 
 	 *	*dataoff: offset of protocol header (TCP, UDP,...) in skb
diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h
index 15c58dd3f701..7e8da04a5eb6 100644
--- a/include/net/netfilter/nf_conntrack_l4proto.h
+++ b/include/net/netfilter/nf_conntrack_l4proto.h
@@ -61,10 +61,6 @@ struct nf_conntrack_l4proto {
 	/* called by gc worker if table is full */
 	bool (*can_early_drop)(const struct nf_conn *ct);
 
-	/* Print out the per-protocol part of the tuple. Return like seq_* */
-	void (*print_tuple)(struct seq_file *s,
-			    const struct nf_conntrack_tuple *);
-
 	/* Print out the private part of the conntrack. */
 	void (*print_conntrack)(struct seq_file *s, struct nf_conn *);
 
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 9f7ea862068c..fe374da4bc13 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -63,13 +63,6 @@ static bool ipv4_invert_tuple(struct nf_conntrack_tuple *tuple,
 	return true;
 }
 
-static void ipv4_print_tuple(struct seq_file *s,
-			    const struct nf_conntrack_tuple *tuple)
-{
-	seq_printf(s, "src=%pI4 dst=%pI4 ",
-		   &tuple->src.u3.ip, &tuple->dst.u3.ip);
-}
-
 static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
 			    unsigned int *dataoff, u_int8_t *protonum)
 {
@@ -355,7 +348,6 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 __read_mostly = {
 	.l3proto	 = PF_INET,
 	.pkt_to_tuple	 = ipv4_pkt_to_tuple,
 	.invert_tuple	 = ipv4_invert_tuple,
-	.print_tuple	 = ipv4_print_tuple,
 	.get_l4proto	 = ipv4_get_l4proto,
 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
 	.tuple_to_nlattr = ipv4_tuple_to_nlattr,
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index fdbeb03e4600..434b4e20f6db 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -71,16 +71,6 @@ static bool icmp_invert_tuple(struct nf_conntrack_tuple *tuple,
 	return true;
 }
 
-/* Print out the per-protocol part of the tuple. */
-static void icmp_print_tuple(struct seq_file *s,
-			    const struct nf_conntrack_tuple *tuple)
-{
-	seq_printf(s, "type=%u code=%u id=%u ",
-		   tuple->dst.u.icmp.type,
-		   tuple->dst.u.icmp.code,
-		   ntohs(tuple->src.u.icmp.id));
-}
-
 static unsigned int *icmp_get_timeouts(struct net *net)
 {
 	return &icmp_pernet(net)->timeout;
@@ -364,7 +354,6 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp __read_mostly =
 	.l4proto		= IPPROTO_ICMP,
 	.pkt_to_tuple		= icmp_pkt_to_tuple,
 	.invert_tuple		= icmp_invert_tuple,
-	.print_tuple		= icmp_print_tuple,
 	.packet			= icmp_packet,
 	.get_timeouts		= icmp_get_timeouts,
 	.new			= icmp_new,
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index 91d37fbe28de..fe01dc953c56 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -67,13 +67,6 @@ static bool ipv6_invert_tuple(struct nf_conntrack_tuple *tuple,
 	return true;
 }
 
-static void ipv6_print_tuple(struct seq_file *s,
-			    const struct nf_conntrack_tuple *tuple)
-{
-	seq_printf(s, "src=%pI6 dst=%pI6 ",
-		   tuple->src.u3.ip6, tuple->dst.u3.ip6);
-}
-
 static int ipv6_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
 			    unsigned int *dataoff, u_int8_t *protonum)
 {
@@ -350,7 +343,6 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 __read_mostly = {
 	.l3proto		= PF_INET6,
 	.pkt_to_tuple		= ipv6_pkt_to_tuple,
 	.invert_tuple		= ipv6_invert_tuple,
-	.print_tuple		= ipv6_print_tuple,
 	.get_l4proto		= ipv6_get_l4proto,
 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
 	.tuple_to_nlattr	= ipv6_tuple_to_nlattr,
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index 805ab122767a..808f63e2e1ff 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -84,16 +84,6 @@ static bool icmpv6_invert_tuple(struct nf_conntrack_tuple *tuple,
 	return true;
 }
 
-/* Print out the per-protocol part of the tuple. */
-static void icmpv6_print_tuple(struct seq_file *s,
-			      const struct nf_conntrack_tuple *tuple)
-{
-	seq_printf(s, "type=%u code=%u id=%u ",
-		   tuple->dst.u.icmp.type,
-		   tuple->dst.u.icmp.code,
-		   ntohs(tuple->src.u.icmp.id));
-}
-
 static unsigned int *icmpv6_get_timeouts(struct net *net)
 {
 	return &icmpv6_pernet(net)->timeout;
@@ -369,7 +359,6 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 __read_mostly =
 	.l4proto		= IPPROTO_ICMPV6,
 	.pkt_to_tuple		= icmpv6_pkt_to_tuple,
 	.invert_tuple		= icmpv6_invert_tuple,
-	.print_tuple		= icmpv6_print_tuple,
 	.packet			= icmpv6_packet,
 	.get_timeouts		= icmpv6_get_timeouts,
 	.new			= icmpv6_new,
diff --git a/net/netfilter/nf_conntrack_l3proto_generic.c b/net/netfilter/nf_conntrack_l3proto_generic.c
index 0387971582bc..397e6911214f 100644
--- a/net/netfilter/nf_conntrack_l3proto_generic.c
+++ b/net/netfilter/nf_conntrack_l3proto_generic.c
@@ -49,11 +49,6 @@ static bool generic_invert_tuple(struct nf_conntrack_tuple *tuple,
 	return true;
 }
 
-static void generic_print_tuple(struct seq_file *s,
-				const struct nf_conntrack_tuple *tuple)
-{
-}
-
 static int generic_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
 			       unsigned int *dataoff, u_int8_t *protonum)
 {
@@ -66,7 +61,6 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_generic __read_mostly = {
 	.l3proto	 = PF_UNSPEC,
 	.pkt_to_tuple	 = generic_pkt_to_tuple,
 	.invert_tuple	 = generic_invert_tuple,
-	.print_tuple	 = generic_print_tuple,
 	.get_l4proto	 = generic_get_l4proto,
 };
 EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_generic);
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index a0492184a0a8..d2df49ac390a 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -623,14 +623,6 @@ static bool dccp_can_early_drop(const struct nf_conn *ct)
 	return false;
 }
 
-static void dccp_print_tuple(struct seq_file *s,
-			     const struct nf_conntrack_tuple *tuple)
-{
-	seq_printf(s, "sport=%hu dport=%hu ",
-		   ntohs(tuple->src.u.dccp.port),
-		   ntohs(tuple->dst.u.dccp.port));
-}
-
 static void dccp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
 {
 	seq_printf(s, "%s ", dccp_state_names[ct->proto.dccp.state]);
@@ -887,7 +879,6 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4 __read_mostly = {
 	.get_timeouts		= dccp_get_timeouts,
 	.error			= dccp_error,
 	.can_early_drop		= dccp_can_early_drop,
-	.print_tuple		= dccp_print_tuple,
 	.print_conntrack	= dccp_print_conntrack,
 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
 	.to_nlattr		= dccp_to_nlattr,
@@ -922,7 +913,6 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp6 __read_mostly = {
 	.get_timeouts		= dccp_get_timeouts,
 	.error			= dccp_error,
 	.can_early_drop		= dccp_can_early_drop,
-	.print_tuple		= dccp_print_tuple,
 	.print_conntrack	= dccp_print_conntrack,
 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
 	.to_nlattr		= dccp_to_nlattr,
diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c
index 4fe8b3312823..2bc3d0c1a5bf 100644
--- a/net/netfilter/nf_conntrack_proto_generic.c
+++ b/net/netfilter/nf_conntrack_proto_generic.c
@@ -62,12 +62,6 @@ static bool generic_invert_tuple(struct nf_conntrack_tuple *tuple,
 	return true;
 }
 
-/* Print out the per-protocol part of the tuple. */
-static void generic_print_tuple(struct seq_file *s,
-				const struct nf_conntrack_tuple *tuple)
-{
-}
-
 static unsigned int *generic_get_timeouts(struct net *net)
 {
 	return &(generic_pernet(net)->timeout);
@@ -189,7 +183,6 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_generic __read_mostly =
 	.l4proto		= 255,
 	.pkt_to_tuple		= generic_pkt_to_tuple,
 	.invert_tuple		= generic_invert_tuple,
-	.print_tuple		= generic_print_tuple,
 	.packet			= generic_packet,
 	.get_timeouts		= generic_get_timeouts,
 	.new			= generic_new,
diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index 984bcfdbd4d7..cd28095dd7a4 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -224,15 +224,6 @@ static bool gre_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
 	return true;
 }
 
-/* print gre part of tuple */
-static void gre_print_tuple(struct seq_file *s,
-			    const struct nf_conntrack_tuple *tuple)
-{
-	seq_printf(s, "srckey=0x%x dstkey=0x%x ",
-		   ntohs(tuple->src.u.gre.key),
-		   ntohs(tuple->dst.u.gre.key));
-}
-
 /* print private data for conntrack */
 static void gre_print_conntrack(struct seq_file *s, struct nf_conn *ct)
 {
@@ -366,7 +357,6 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_gre4 __read_mostly = {
 	.l4proto	 = IPPROTO_GRE,
 	.pkt_to_tuple	 = gre_pkt_to_tuple,
 	.invert_tuple	 = gre_invert_tuple,
-	.print_tuple	 = gre_print_tuple,
 	.print_conntrack = gre_print_conntrack,
 	.get_timeouts    = gre_get_timeouts,
 	.packet		 = gre_packet,
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index 1d7a995ea049..da83b401be17 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -174,15 +174,6 @@ static bool sctp_invert_tuple(struct nf_conntrack_tuple *tuple,
 	return true;
 }
 
-/* Print out the per-protocol part of the tuple. */
-static void sctp_print_tuple(struct seq_file *s,
-			     const struct nf_conntrack_tuple *tuple)
-{
-	seq_printf(s, "sport=%hu dport=%hu ",
-		   ntohs(tuple->src.u.sctp.port),
-		   ntohs(tuple->dst.u.sctp.port));
-}
-
 /* Print out the private part of the conntrack. */
 static void sctp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
 {
@@ -793,7 +784,6 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 __read_mostly = {
 	.l4proto 		= IPPROTO_SCTP,
 	.pkt_to_tuple 		= sctp_pkt_to_tuple,
 	.invert_tuple 		= sctp_invert_tuple,
-	.print_tuple 		= sctp_print_tuple,
 	.print_conntrack	= sctp_print_conntrack,
 	.packet 		= sctp_packet,
 	.get_timeouts		= sctp_get_timeouts,
@@ -829,7 +819,6 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 __read_mostly = {
 	.l4proto 		= IPPROTO_SCTP,
 	.pkt_to_tuple 		= sctp_pkt_to_tuple,
 	.invert_tuple 		= sctp_invert_tuple,
-	.print_tuple 		= sctp_print_tuple,
 	.print_conntrack	= sctp_print_conntrack,
 	.packet 		= sctp_packet,
 	.get_timeouts		= sctp_get_timeouts,
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index e3e59e3d0592..c868b36b8945 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -301,15 +301,6 @@ static bool tcp_invert_tuple(struct nf_conntrack_tuple *tuple,
 	return true;
 }
 
-/* Print out the per-protocol part of the tuple. */
-static void tcp_print_tuple(struct seq_file *s,
-			    const struct nf_conntrack_tuple *tuple)
-{
-	seq_printf(s, "sport=%hu dport=%hu ",
-		   ntohs(tuple->src.u.tcp.port),
-		   ntohs(tuple->dst.u.tcp.port));
-}
-
 /* Print out the private part of the conntrack. */
 static void tcp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
 {
@@ -1558,7 +1549,6 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 __read_mostly =
 	.l4proto 		= IPPROTO_TCP,
 	.pkt_to_tuple 		= tcp_pkt_to_tuple,
 	.invert_tuple 		= tcp_invert_tuple,
-	.print_tuple 		= tcp_print_tuple,
 	.print_conntrack 	= tcp_print_conntrack,
 	.packet 		= tcp_packet,
 	.get_timeouts		= tcp_get_timeouts,
@@ -1595,7 +1585,6 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 __read_mostly =
 	.l4proto 		= IPPROTO_TCP,
 	.pkt_to_tuple 		= tcp_pkt_to_tuple,
 	.invert_tuple 		= tcp_invert_tuple,
-	.print_tuple 		= tcp_print_tuple,
 	.print_conntrack 	= tcp_print_conntrack,
 	.packet 		= tcp_packet,
 	.get_timeouts		= tcp_get_timeouts,
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
index ec861a1169f1..dcf3030d2226 100644
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -63,15 +63,6 @@ static bool udp_invert_tuple(struct nf_conntrack_tuple *tuple,
 	return true;
 }
 
-/* Print out the per-protocol part of the tuple. */
-static void udp_print_tuple(struct seq_file *s,
-			    const struct nf_conntrack_tuple *tuple)
-{
-	seq_printf(s, "sport=%hu dport=%hu ",
-		   ntohs(tuple->src.u.udp.port),
-		   ntohs(tuple->dst.u.udp.port));
-}
-
 static unsigned int *udp_get_timeouts(struct net *net)
 {
 	return udp_pernet(net)->timeouts;
@@ -316,7 +307,6 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4 __read_mostly =
 	.allow_clash		= true,
 	.pkt_to_tuple		= udp_pkt_to_tuple,
 	.invert_tuple		= udp_invert_tuple,
-	.print_tuple		= udp_print_tuple,
 	.packet			= udp_packet,
 	.get_timeouts		= udp_get_timeouts,
 	.new			= udp_new,
@@ -349,7 +339,6 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4 __read_mostly =
 	.allow_clash		= true,
 	.pkt_to_tuple		= udp_pkt_to_tuple,
 	.invert_tuple		= udp_invert_tuple,
-	.print_tuple		= udp_print_tuple,
 	.packet			= udp_packet,
 	.get_timeouts		= udp_get_timeouts,
 	.new			= udp_new,
@@ -382,7 +371,6 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6 __read_mostly =
 	.allow_clash		= true,
 	.pkt_to_tuple		= udp_pkt_to_tuple,
 	.invert_tuple		= udp_invert_tuple,
-	.print_tuple		= udp_print_tuple,
 	.packet			= udp_packet,
 	.get_timeouts		= udp_get_timeouts,
 	.new			= udp_new,
@@ -415,7 +403,6 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6 __read_mostly =
 	.allow_clash		= true,
 	.pkt_to_tuple		= udp_pkt_to_tuple,
 	.invert_tuple		= udp_invert_tuple,
-	.print_tuple		= udp_print_tuple,
 	.packet			= udp_packet,
 	.get_timeouts		= udp_get_timeouts,
 	.new			= udp_new,
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index b28f9e93f574..9eb85858d764 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -41,8 +41,62 @@ print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple,
             const struct nf_conntrack_l3proto *l3proto,
             const struct nf_conntrack_l4proto *l4proto)
 {
-	l3proto->print_tuple(s, tuple);
-	l4proto->print_tuple(s, tuple);
+	switch (l3proto->l3proto) {
+	case NFPROTO_IPV4:
+		seq_printf(s, "src=%pI4 dst=%pI4 ",
+			   &tuple->src.u3.ip, &tuple->dst.u3.ip);
+		break;
+	case NFPROTO_IPV6:
+		seq_printf(s, "src=%pI6 dst=%pI6 ",
+			   tuple->src.u3.ip6, tuple->dst.u3.ip6);
+		break;
+	default:
+		break;
+	}
+
+	switch (l4proto->l4proto) {
+	case IPPROTO_ICMP:
+		seq_printf(s, "type=%u code=%u id=%u ",
+			   tuple->dst.u.icmp.type,
+			   tuple->dst.u.icmp.code,
+			   ntohs(tuple->src.u.icmp.id));
+		break;
+	case IPPROTO_TCP:
+		seq_printf(s, "sport=%hu dport=%hu ",
+			   ntohs(tuple->src.u.tcp.port),
+			   ntohs(tuple->dst.u.tcp.port));
+		break;
+	case IPPROTO_UDPLITE: /* fallthrough */
+	case IPPROTO_UDP:
+		seq_printf(s, "sport=%hu dport=%hu ",
+			   ntohs(tuple->src.u.udp.port),
+			   ntohs(tuple->dst.u.udp.port));
+
+		break;
+	case IPPROTO_DCCP:
+		seq_printf(s, "sport=%hu dport=%hu ",
+			   ntohs(tuple->src.u.dccp.port),
+			   ntohs(tuple->dst.u.dccp.port));
+		break;
+	case IPPROTO_SCTP:
+		seq_printf(s, "sport=%hu dport=%hu ",
+			   ntohs(tuple->src.u.sctp.port),
+			   ntohs(tuple->dst.u.sctp.port));
+		break;
+	case IPPROTO_ICMPV6:
+		seq_printf(s, "type=%u code=%u id=%u ",
+			   tuple->dst.u.icmp.type,
+			   tuple->dst.u.icmp.code,
+			   ntohs(tuple->src.u.icmp.id));
+		break;
+	case IPPROTO_GRE:
+		seq_printf(s, "srckey=0x%x dstkey=0x%x ",
+			   ntohs(tuple->src.u.gre.key),
+			   ntohs(tuple->dst.u.gre.key));
+		break;
+	default:
+		break;
+	}
 }
 EXPORT_SYMBOL_GPL(print_tuple);
 
-- 
cgit v1.2.3


From ea48cc83cf612fddb4e8868369348b9f936cfedb Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Sat, 12 Aug 2017 00:57:07 +0200
Subject: netfilter: conntrack: print_conntrack only needed if
 CONFIG_NF_CONNTRACK_PROCFS

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_l4proto.h | 7 ++++---
 net/netfilter/nf_conntrack_proto_dccp.c      | 6 ++++++
 net/netfilter/nf_conntrack_proto_gre.c       | 4 ++++
 net/netfilter/nf_conntrack_proto_sctp.c      | 6 ++++++
 net/netfilter/nf_conntrack_proto_tcp.c       | 6 ++++++
 5 files changed, 26 insertions(+), 3 deletions(-)

(limited to 'include/net')

diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h
index 7e8da04a5eb6..4976ef92dc78 100644
--- a/include/net/netfilter/nf_conntrack_l4proto.h
+++ b/include/net/netfilter/nf_conntrack_l4proto.h
@@ -61,9 +61,6 @@ struct nf_conntrack_l4proto {
 	/* called by gc worker if table is full */
 	bool (*can_early_drop)(const struct nf_conn *ct);
 
-	/* Print out the private part of the conntrack. */
-	void (*print_conntrack)(struct seq_file *s, struct nf_conn *);
-
 	/* Return the array of timeouts for this protocol. */
 	unsigned int *(*get_timeouts)(struct net *net);
 
@@ -96,6 +93,10 @@ struct nf_conntrack_l4proto {
 		u16 nlattr_max;
 		const struct nla_policy *nla_policy;
 	} ctnl_timeout;
+#endif
+#ifdef CONFIG_NF_CONNTRACK_PROCFS
+	/* Print out the private part of the conntrack. */
+	void (*print_conntrack)(struct seq_file *s, struct nf_conn *);
 #endif
 	unsigned int	*net_id;
 	/* Init l4proto pernet data */
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index d2df49ac390a..188347571fc7 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -623,10 +623,12 @@ static bool dccp_can_early_drop(const struct nf_conn *ct)
 	return false;
 }
 
+#ifdef CONFIG_NF_CONNTRACK_PROCFS
 static void dccp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
 {
 	seq_printf(s, "%s ", dccp_state_names[ct->proto.dccp.state]);
 }
+#endif
 
 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
 static int dccp_to_nlattr(struct sk_buff *skb, struct nlattr *nla,
@@ -879,7 +881,9 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4 __read_mostly = {
 	.get_timeouts		= dccp_get_timeouts,
 	.error			= dccp_error,
 	.can_early_drop		= dccp_can_early_drop,
+#ifdef CONFIG_NF_CONNTRACK_PROCFS
 	.print_conntrack	= dccp_print_conntrack,
+#endif
 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
 	.to_nlattr		= dccp_to_nlattr,
 	.nlattr_size		= dccp_nlattr_size,
@@ -913,7 +917,9 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp6 __read_mostly = {
 	.get_timeouts		= dccp_get_timeouts,
 	.error			= dccp_error,
 	.can_early_drop		= dccp_can_early_drop,
+#ifdef CONFIG_NF_CONNTRACK_PROCFS
 	.print_conntrack	= dccp_print_conntrack,
+#endif
 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
 	.to_nlattr		= dccp_to_nlattr,
 	.nlattr_size		= dccp_nlattr_size,
diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index cd28095dd7a4..c0e3a23ac23a 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -224,6 +224,7 @@ static bool gre_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
 	return true;
 }
 
+#ifdef CONFIG_NF_CONNTRACK_PROCFS
 /* print private data for conntrack */
 static void gre_print_conntrack(struct seq_file *s, struct nf_conn *ct)
 {
@@ -231,6 +232,7 @@ static void gre_print_conntrack(struct seq_file *s, struct nf_conn *ct)
 		   (ct->proto.gre.timeout / HZ),
 		   (ct->proto.gre.stream_timeout / HZ));
 }
+#endif
 
 static unsigned int *gre_get_timeouts(struct net *net)
 {
@@ -357,7 +359,9 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_gre4 __read_mostly = {
 	.l4proto	 = IPPROTO_GRE,
 	.pkt_to_tuple	 = gre_pkt_to_tuple,
 	.invert_tuple	 = gre_invert_tuple,
+#ifdef CONFIG_NF_CONNTRACK_PROCFS
 	.print_conntrack = gre_print_conntrack,
+#endif
 	.get_timeouts    = gre_get_timeouts,
 	.packet		 = gre_packet,
 	.new		 = gre_new,
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index da83b401be17..890b5c73368d 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -174,11 +174,13 @@ static bool sctp_invert_tuple(struct nf_conntrack_tuple *tuple,
 	return true;
 }
 
+#ifdef CONFIG_NF_CONNTRACK_PROCFS
 /* Print out the private part of the conntrack. */
 static void sctp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
 {
 	seq_printf(s, "%s ", sctp_conntrack_names[ct->proto.sctp.state]);
 }
+#endif
 
 #define for_each_sctp_chunk(skb, sch, _sch, offset, dataoff, count)	\
 for ((offset) = (dataoff) + sizeof(struct sctphdr), (count) = 0;	\
@@ -784,7 +786,9 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 __read_mostly = {
 	.l4proto 		= IPPROTO_SCTP,
 	.pkt_to_tuple 		= sctp_pkt_to_tuple,
 	.invert_tuple 		= sctp_invert_tuple,
+#ifdef CONFIG_NF_CONNTRACK_PROCFS
 	.print_conntrack	= sctp_print_conntrack,
+#endif
 	.packet 		= sctp_packet,
 	.get_timeouts		= sctp_get_timeouts,
 	.new 			= sctp_new,
@@ -819,7 +823,9 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 __read_mostly = {
 	.l4proto 		= IPPROTO_SCTP,
 	.pkt_to_tuple 		= sctp_pkt_to_tuple,
 	.invert_tuple 		= sctp_invert_tuple,
+#ifdef CONFIG_NF_CONNTRACK_PROCFS
 	.print_conntrack	= sctp_print_conntrack,
+#endif
 	.packet 		= sctp_packet,
 	.get_timeouts		= sctp_get_timeouts,
 	.new 			= sctp_new,
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index c868b36b8945..33c52d9ab2f5 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -301,11 +301,13 @@ static bool tcp_invert_tuple(struct nf_conntrack_tuple *tuple,
 	return true;
 }
 
+#ifdef CONFIG_NF_CONNTRACK_PROCFS
 /* Print out the private part of the conntrack. */
 static void tcp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
 {
 	seq_printf(s, "%s ", tcp_conntrack_names[ct->proto.tcp.state]);
 }
+#endif
 
 static unsigned int get_conntrack_index(const struct tcphdr *tcph)
 {
@@ -1549,7 +1551,9 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 __read_mostly =
 	.l4proto 		= IPPROTO_TCP,
 	.pkt_to_tuple 		= tcp_pkt_to_tuple,
 	.invert_tuple 		= tcp_invert_tuple,
+#ifdef CONFIG_NF_CONNTRACK_PROCFS
 	.print_conntrack 	= tcp_print_conntrack,
+#endif
 	.packet 		= tcp_packet,
 	.get_timeouts		= tcp_get_timeouts,
 	.new 			= tcp_new,
@@ -1585,7 +1589,9 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 __read_mostly =
 	.l4proto 		= IPPROTO_TCP,
 	.pkt_to_tuple 		= tcp_pkt_to_tuple,
 	.invert_tuple 		= tcp_invert_tuple,
+#ifdef CONFIG_NF_CONNTRACK_PROCFS
 	.print_conntrack 	= tcp_print_conntrack,
+#endif
 	.packet 		= tcp_packet,
 	.get_timeouts		= tcp_get_timeouts,
 	.new 			= tcp_new,
-- 
cgit v1.2.3


From b3480fe059ac9121b5714205b4ddae14b59ef4be Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Sat, 12 Aug 2017 00:57:08 +0200
Subject: netfilter: conntrack: make protocol tracker pointers const

Doesn't change generated code, but will make it easier to eventually
make the actual trackers themselvers const.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_l3proto.h |  6 +++---
 include/net/netfilter/nf_conntrack_l4proto.h |  4 ++--
 include/net/netfilter/nf_conntrack_timeout.h |  2 +-
 net/netfilter/nf_conntrack_core.c            | 12 ++++++------
 net/netfilter/nf_conntrack_netlink.c         | 22 +++++++++++-----------
 net/netfilter/nf_conntrack_proto.c           | 20 ++++++++++----------
 net/netfilter/nfnetlink_cttimeout.c          | 14 +++++++-------
 net/netfilter/xt_CT.c                        |  2 +-
 net/openvswitch/conntrack.c                  |  4 ++--
 9 files changed, 43 insertions(+), 43 deletions(-)

(limited to 'include/net')

diff --git a/include/net/netfilter/nf_conntrack_l3proto.h b/include/net/netfilter/nf_conntrack_l3proto.h
index dabb53b0913c..6269deecbee7 100644
--- a/include/net/netfilter/nf_conntrack_l3proto.h
+++ b/include/net/netfilter/nf_conntrack_l3proto.h
@@ -64,10 +64,10 @@ struct nf_conntrack_l3proto {
 extern struct nf_conntrack_l3proto __rcu *nf_ct_l3protos[NFPROTO_NUMPROTO];
 
 /* Protocol global registration. */
-int nf_ct_l3proto_register(struct nf_conntrack_l3proto *proto);
-void nf_ct_l3proto_unregister(struct nf_conntrack_l3proto *proto);
+int nf_ct_l3proto_register(const struct nf_conntrack_l3proto *proto);
+void nf_ct_l3proto_unregister(const struct nf_conntrack_l3proto *proto);
 
-struct nf_conntrack_l3proto *nf_ct_l3proto_find_get(u_int16_t l3proto);
+const struct nf_conntrack_l3proto *nf_ct_l3proto_find_get(u_int16_t l3proto);
 
 /* Existing built-in protocols */
 extern struct nf_conntrack_l3proto nf_conntrack_l3proto_generic;
diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h
index 4976ef92dc78..d4933d56809d 100644
--- a/include/net/netfilter/nf_conntrack_l4proto.h
+++ b/include/net/netfilter/nf_conntrack_l4proto.h
@@ -114,10 +114,10 @@ extern struct nf_conntrack_l4proto nf_conntrack_l4proto_generic;
 
 #define MAX_NF_CT_PROTO 256
 
-struct nf_conntrack_l4proto *__nf_ct_l4proto_find(u_int16_t l3proto,
+const struct nf_conntrack_l4proto *__nf_ct_l4proto_find(u_int16_t l3proto,
 						  u_int8_t l4proto);
 
-struct nf_conntrack_l4proto *nf_ct_l4proto_find_get(u_int16_t l3proto,
+const struct nf_conntrack_l4proto *nf_ct_l4proto_find_get(u_int16_t l3proto,
 						    u_int8_t l4proto);
 void nf_ct_l4proto_put(const struct nf_conntrack_l4proto *p);
 
diff --git a/include/net/netfilter/nf_conntrack_timeout.h b/include/net/netfilter/nf_conntrack_timeout.h
index b222957062b5..483d104fa254 100644
--- a/include/net/netfilter/nf_conntrack_timeout.h
+++ b/include/net/netfilter/nf_conntrack_timeout.h
@@ -16,7 +16,7 @@ struct ctnl_timeout {
 	refcount_t		refcnt;
 	char			name[CTNL_TIMEOUT_NAME_MAX];
 	__u16			l3num;
-	struct nf_conntrack_l4proto *l4proto;
+	const struct nf_conntrack_l4proto *l4proto;
 	char			data[0];
 };
 
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index f2f00eaf217d..c23df7c9cd59 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -250,8 +250,8 @@ bool nf_ct_get_tuplepr(const struct sk_buff *skb, unsigned int nhoff,
 		       u_int16_t l3num,
 		       struct net *net, struct nf_conntrack_tuple *tuple)
 {
-	struct nf_conntrack_l3proto *l3proto;
-	struct nf_conntrack_l4proto *l4proto;
+	const struct nf_conntrack_l3proto *l3proto;
+	const struct nf_conntrack_l4proto *l4proto;
 	unsigned int protoff;
 	u_int8_t protonum;
 	int ret;
@@ -400,7 +400,7 @@ static void
 destroy_conntrack(struct nf_conntrack *nfct)
 {
 	struct nf_conn *ct = (struct nf_conn *)nfct;
-	struct nf_conntrack_l4proto *l4proto;
+	const struct nf_conntrack_l4proto *l4proto;
 
 	pr_debug("destroy_conntrack(%p)\n", ct);
 	NF_CT_ASSERT(atomic_read(&nfct->use) == 0);
@@ -694,7 +694,7 @@ static int nf_ct_resolve_clash(struct net *net, struct sk_buff *skb,
 {
 	/* This is the conntrack entry already in hashes that won race. */
 	struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
-	struct nf_conntrack_l4proto *l4proto;
+	const struct nf_conntrack_l4proto *l4proto;
 
 	l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
 	if (l4proto->allow_clash &&
@@ -1344,10 +1344,10 @@ unsigned int
 nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
 		struct sk_buff *skb)
 {
+	const struct nf_conntrack_l3proto *l3proto;
+	const struct nf_conntrack_l4proto *l4proto;
 	struct nf_conn *ct, *tmpl;
 	enum ip_conntrack_info ctinfo;
-	struct nf_conntrack_l3proto *l3proto;
-	struct nf_conntrack_l4proto *l4proto;
 	unsigned int *timeouts;
 	unsigned int dataoff;
 	u_int8_t protonum;
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index b59a453a0fd8..de4053d84364 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -109,9 +109,9 @@ nla_put_failure:
 static int ctnetlink_dump_tuples(struct sk_buff *skb,
 				 const struct nf_conntrack_tuple *tuple)
 {
+	const struct nf_conntrack_l3proto *l3proto;
+	const struct nf_conntrack_l4proto *l4proto;
 	int ret;
-	struct nf_conntrack_l3proto *l3proto;
-	struct nf_conntrack_l4proto *l4proto;
 
 	rcu_read_lock();
 	l3proto = __nf_ct_l3proto_find(tuple->src.l3num);
@@ -163,7 +163,7 @@ nla_put_failure:
 
 static int ctnetlink_dump_protoinfo(struct sk_buff *skb, struct nf_conn *ct)
 {
-	struct nf_conntrack_l4proto *l4proto;
+	const struct nf_conntrack_l4proto *l4proto;
 	struct nlattr *nest_proto;
 	int ret;
 
@@ -535,9 +535,9 @@ nla_put_failure:
 
 static inline size_t ctnetlink_proto_size(const struct nf_conn *ct)
 {
-	struct nf_conntrack_l3proto *l3proto;
-	struct nf_conntrack_l4proto *l4proto;
-	size_t len = 0;
+	const struct nf_conntrack_l3proto *l3proto;
+	const struct nf_conntrack_l4proto *l4proto;
+	size_t len;
 
 	l3proto = __nf_ct_l3proto_find(nf_ct_l3num(ct));
 	len = l3proto->nla_size;
@@ -936,8 +936,8 @@ static const struct nla_policy proto_nla_policy[CTA_PROTO_MAX+1] = {
 static int ctnetlink_parse_tuple_proto(struct nlattr *attr,
 				       struct nf_conntrack_tuple *tuple)
 {
+	const struct nf_conntrack_l4proto *l4proto;
 	struct nlattr *tb[CTA_PROTO_MAX+1];
-	struct nf_conntrack_l4proto *l4proto;
 	int ret = 0;
 
 	ret = nla_parse_nested(tb, CTA_PROTO_MAX, attr, proto_nla_policy,
@@ -1580,8 +1580,8 @@ static int ctnetlink_change_protoinfo(struct nf_conn *ct,
 				      const struct nlattr * const cda[])
 {
 	const struct nlattr *attr = cda[CTA_PROTOINFO];
+	const struct nf_conntrack_l4proto *l4proto;
 	struct nlattr *tb[CTA_PROTOINFO_MAX+1];
-	struct nf_conntrack_l4proto *l4proto;
 	int err = 0;
 
 	err = nla_parse_nested(tb, CTA_PROTOINFO_MAX, attr, protoinfo_policy,
@@ -2475,11 +2475,11 @@ static int ctnetlink_exp_dump_mask(struct sk_buff *skb,
 				   const struct nf_conntrack_tuple *tuple,
 				   const struct nf_conntrack_tuple_mask *mask)
 {
-	int ret;
-	struct nf_conntrack_l3proto *l3proto;
-	struct nf_conntrack_l4proto *l4proto;
+	const struct nf_conntrack_l3proto *l3proto;
+	const struct nf_conntrack_l4proto *l4proto;
 	struct nf_conntrack_tuple m;
 	struct nlattr *nest_parms;
+	int ret;
 
 	memset(&m, 0xFF, sizeof(m));
 	memcpy(&m.src.u3, &mask->src.u3, sizeof(m.src.u3));
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index 0ecab7163d62..b3e489c859ec 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -65,7 +65,7 @@ nf_ct_unregister_sysctl(struct ctl_table_header **header,
 }
 #endif
 
-struct nf_conntrack_l4proto *
+const struct nf_conntrack_l4proto *
 __nf_ct_l4proto_find(u_int16_t l3proto, u_int8_t l4proto)
 {
 	if (unlikely(l3proto >= NFPROTO_NUMPROTO || nf_ct_protos[l3proto] == NULL))
@@ -77,7 +77,7 @@ EXPORT_SYMBOL_GPL(__nf_ct_l4proto_find);
 
 /* this is guaranteed to always return a valid protocol helper, since
  * it falls back to generic_protocol */
-struct nf_conntrack_l3proto *
+const struct nf_conntrack_l3proto *
 nf_ct_l3proto_find_get(u_int16_t l3proto)
 {
 	struct nf_conntrack_l3proto *p;
@@ -95,8 +95,8 @@ EXPORT_SYMBOL_GPL(nf_ct_l3proto_find_get);
 int
 nf_ct_l3proto_try_module_get(unsigned short l3proto)
 {
+	const struct nf_conntrack_l3proto *p;
 	int ret;
-	struct nf_conntrack_l3proto *p;
 
 retry:	p = nf_ct_l3proto_find_get(l3proto);
 	if (p == &nf_conntrack_l3proto_generic) {
@@ -173,10 +173,10 @@ void nf_ct_netns_put(struct net *net, u8 nfproto)
 }
 EXPORT_SYMBOL_GPL(nf_ct_netns_put);
 
-struct nf_conntrack_l4proto *
+const struct nf_conntrack_l4proto *
 nf_ct_l4proto_find_get(u_int16_t l3num, u_int8_t l4num)
 {
-	struct nf_conntrack_l4proto *p;
+	const struct nf_conntrack_l4proto *p;
 
 	rcu_read_lock();
 	p = __nf_ct_l4proto_find(l3num, l4num);
@@ -196,18 +196,18 @@ EXPORT_SYMBOL_GPL(nf_ct_l4proto_put);
 
 static int kill_l3proto(struct nf_conn *i, void *data)
 {
-	return nf_ct_l3num(i) == ((struct nf_conntrack_l3proto *)data)->l3proto;
+	return nf_ct_l3num(i) == ((const struct nf_conntrack_l3proto *)data)->l3proto;
 }
 
 static int kill_l4proto(struct nf_conn *i, void *data)
 {
-	struct nf_conntrack_l4proto *l4proto;
+	const struct nf_conntrack_l4proto *l4proto;
 	l4proto = data;
 	return nf_ct_protonum(i) == l4proto->l4proto &&
 	       nf_ct_l3num(i) == l4proto->l3proto;
 }
 
-int nf_ct_l3proto_register(struct nf_conntrack_l3proto *proto)
+int nf_ct_l3proto_register(const struct nf_conntrack_l3proto *proto)
 {
 	int ret = 0;
 	struct nf_conntrack_l3proto *old;
@@ -235,7 +235,7 @@ out_unlock:
 }
 EXPORT_SYMBOL_GPL(nf_ct_l3proto_register);
 
-void nf_ct_l3proto_unregister(struct nf_conntrack_l3proto *proto)
+void nf_ct_l3proto_unregister(const struct nf_conntrack_l3proto *proto)
 {
 	BUG_ON(proto->l3proto >= NFPROTO_NUMPROTO);
 
@@ -249,7 +249,7 @@ void nf_ct_l3proto_unregister(struct nf_conntrack_l3proto *proto)
 
 	synchronize_rcu();
 	/* Remove all contrack entries for this protocol */
-	nf_ct_iterate_destroy(kill_l3proto, proto);
+	nf_ct_iterate_destroy(kill_l3proto, (void*)proto);
 }
 EXPORT_SYMBOL_GPL(nf_ct_l3proto_unregister);
 
diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c
index fcabccc99f0d..32b1c0b44e79 100644
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -75,7 +75,7 @@ static int cttimeout_new_timeout(struct net *net, struct sock *ctnl,
 {
 	__u16 l3num;
 	__u8 l4num;
-	struct nf_conntrack_l4proto *l4proto;
+	const struct nf_conntrack_l4proto *l4proto;
 	struct ctnl_timeout *timeout, *matching = NULL;
 	char *name;
 	int ret;
@@ -159,7 +159,7 @@ ctnl_timeout_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
 	struct nlmsghdr *nlh;
 	struct nfgenmsg *nfmsg;
 	unsigned int flags = portid ? NLM_F_MULTI : 0;
-	struct nf_conntrack_l4proto *l4proto = timeout->l4proto;
+	const struct nf_conntrack_l4proto *l4proto = timeout->l4proto;
 
 	event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK_TIMEOUT, event);
 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags);
@@ -364,10 +364,10 @@ static int cttimeout_default_set(struct net *net, struct sock *ctnl,
 				 const struct nlattr * const cda[],
 				 struct netlink_ext_ack *extack)
 {
+	const struct nf_conntrack_l4proto *l4proto;
+	unsigned int *timeouts;
 	__u16 l3num;
 	__u8 l4num;
-	struct nf_conntrack_l4proto *l4proto;
-	unsigned int *timeouts;
 	int ret;
 
 	if (!cda[CTA_TIMEOUT_L3PROTO] ||
@@ -454,11 +454,11 @@ static int cttimeout_default_get(struct net *net, struct sock *ctnl,
 				 const struct nlattr * const cda[],
 				 struct netlink_ext_ack *extack)
 {
-	__u16 l3num;
-	__u8 l4num;
-	struct nf_conntrack_l4proto *l4proto;
+	const struct nf_conntrack_l4proto *l4proto;
 	struct sk_buff *skb2;
 	int ret, err;
+	__u16 l3num;
+	__u8 l4num;
 
 	if (!cda[CTA_TIMEOUT_L3PROTO] || !cda[CTA_TIMEOUT_L4PROTO])
 		return -EINVAL;
diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
index 623ef37de886..5a152e2acfd5 100644
--- a/net/netfilter/xt_CT.c
+++ b/net/netfilter/xt_CT.c
@@ -121,9 +121,9 @@ xt_ct_set_timeout(struct nf_conn *ct, const struct xt_tgchk_param *par,
 {
 #ifdef CONFIG_NF_CONNTRACK_TIMEOUT
 	typeof(nf_ct_timeout_find_get_hook) timeout_find_get;
+	const struct nf_conntrack_l4proto *l4proto;
 	struct ctnl_timeout *timeout;
 	struct nf_conn_timeout *timeout_ext;
-	struct nf_conntrack_l4proto *l4proto;
 	int ret = 0;
 	u8 proto;
 
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index e3c4c6c3fef7..283363534647 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -579,8 +579,8 @@ static struct nf_conn *
 ovs_ct_find_existing(struct net *net, const struct nf_conntrack_zone *zone,
 		     u8 l3num, struct sk_buff *skb, bool natted)
 {
-	struct nf_conntrack_l3proto *l3proto;
-	struct nf_conntrack_l4proto *l4proto;
+	const struct nf_conntrack_l3proto *l3proto;
+	const struct nf_conntrack_l4proto *l4proto;
 	struct nf_conntrack_tuple tuple;
 	struct nf_conntrack_tuple_hash *h;
 	struct nf_conn *ct;
-- 
cgit v1.2.3


From 22b6722bfa591ba03d6a0c5521b600d4ab2d9a27 Mon Sep 17 00:00:00 2001
From: Jakub Sitnicki <jkbs@redhat.com>
Date: Wed, 23 Aug 2017 09:55:41 +0200
Subject: ipv6: Add sysctl for per namespace flow label reflection

Reflecting IPv6 Flow Label at server nodes is useful in environments
that employ multipath routing to load balance the requests. As "IPv6
Flow Label Reflection" standard draft [1] points out - ICMPv6 PTB error
messages generated in response to a downstream packets from the server
can be routed by a load balancer back to the original server without
looking at transport headers, if the server applies the flow label
reflection. This enables the Path MTU Discovery past the ECMP router in
load-balance or anycast environments where each server node is reachable
by only one path.

Introduce a sysctl to enable flow label reflection per net namespace for
all newly created sockets. Same could be earlier achieved only per
socket by setting the IPV6_FL_F_REFLECT flag for the IPV6_FLOWLABEL_MGR
socket option.

[1] https://tools.ietf.org/html/draft-wang-6man-flow-label-reflection-01

Signed-off-by: Jakub Sitnicki <jkbs@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ip-sysctl.txt | 9 +++++++++
 include/net/netns/ipv6.h               | 1 +
 net/ipv6/af_inet6.c                    | 1 +
 net/ipv6/sysctl_net_ipv6.c             | 8 ++++++++
 4 files changed, 19 insertions(+)

(limited to 'include/net')

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index 84c9b8cee780..6b0bc0f71534 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -1350,6 +1350,15 @@ flowlabel_state_ranges - BOOLEAN
 	FALSE: disabled
 	Default: true
 
+flowlabel_reflect - BOOLEAN
+	Automatically reflect the flow label. Needed for Path MTU
+	Discovery to work with Equal Cost Multipath Routing in anycast
+	environments. See RFC 7690 and:
+	https://tools.ietf.org/html/draft-wang-6man-flow-label-reflection-01
+	TRUE: enabled
+	FALSE: disabled
+	Default: FALSE
+
 anycast_src_echo_reply - BOOLEAN
 	Controls the use of anycast addresses as source addresses for ICMPv6
 	echo reply
diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index 0e50bf3ed097..2544f9760a42 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -36,6 +36,7 @@ struct netns_sysctl_ipv6 {
 	int idgen_retries;
 	int idgen_delay;
 	int flowlabel_state_ranges;
+	int flowlabel_reflect;
 };
 
 struct netns_ipv6 {
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 3b58ee709f33..fe5262fd6aa5 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -211,6 +211,7 @@ lookup_protocol:
 	np->mc_loop	= 1;
 	np->pmtudisc	= IPV6_PMTUDISC_WANT;
 	np->autoflowlabel = ip6_default_np_autolabel(net);
+	np->repflow	= net->ipv6.sysctl.flowlabel_reflect;
 	sk->sk_ipv6only	= net->ipv6.sysctl.bindv6only;
 
 	/* Init the ipv4 part of the socket since we can have sockets
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index 69c50e737c54..6fbf8ae5e52c 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -90,6 +90,13 @@ static struct ctl_table ipv6_table_template[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec
 	},
+	{
+		.procname	= "flowlabel_reflect",
+		.data		= &init_net.ipv6.sysctl.flowlabel_reflect,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
 	{ }
 };
 
@@ -149,6 +156,7 @@ static int __net_init ipv6_sysctl_net_init(struct net *net)
 	ipv6_table[6].data = &net->ipv6.sysctl.idgen_delay;
 	ipv6_table[7].data = &net->ipv6.sysctl.flowlabel_state_ranges;
 	ipv6_table[8].data = &net->ipv6.sysctl.ip_nonlocal_bind;
+	ipv6_table[9].data = &net->ipv6.sysctl.flowlabel_reflect;
 
 	ipv6_route_table = ipv6_route_sysctl_init(net);
 	if (!ipv6_route_table)
-- 
cgit v1.2.3


From 790c6056686cc4dd5b149b330bbd5ae208d4d721 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Thu, 24 Aug 2017 18:10:46 -0700
Subject: devlink: Fix devlink_dpipe_table_register() stub signature.

One too many arguments compared to the non-stub version.

Reported-by: kbuild test robot <fengguang.wu@intel.com>
Fixes: ffd3cdccf214 ("devlink: Add support for dynamic table size")
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/devlink.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/net')

diff --git a/include/net/devlink.h b/include/net/devlink.h
index 047a0c54f652..aaf7178127a2 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -402,8 +402,7 @@ static inline int
 devlink_dpipe_table_register(struct devlink *devlink,
 			     const char *table_name,
 			     struct devlink_dpipe_table_ops *table_ops,
-			     void *priv, u64 size,
-			     bool counter_control_extern)
+			     void *priv, bool counter_control_extern)
 {
 	return 0;
 }
-- 
cgit v1.2.3


From 29825717123fb9cfb9e709327d565c2f2fa89903 Mon Sep 17 00:00:00 2001
From: Jakub Sitnicki <jkbs@redhat.com>
Date: Wed, 23 Aug 2017 09:58:28 +0200
Subject: net: Extend struct flowi6 with multipath hash

Allow for functions that fill out the IPv6 flow info to also pass a hash
computed over the skb contents. The hash value will drive the multipath
routing decisions.

This is intended for special treatment of ICMPv6 errors, where we would
like to make a routing decision based on the flow identifying the
offending IPv6 datagram that triggered the error, rather than the flow
of the ICMP error itself.

Signed-off-by: Jakub Sitnicki <jkbs@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/flow.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/net')

diff --git a/include/net/flow.h b/include/net/flow.h
index f3dc61b29bb5..eb60cee30b44 100644
--- a/include/net/flow.h
+++ b/include/net/flow.h
@@ -149,6 +149,7 @@ struct flowi6 {
 #define fl6_ipsec_spi		uli.spi
 #define fl6_mh_type		uli.mht.type
 #define fl6_gre_key		uli.gre_key
+	__u32			mp_hash;
 } __attribute__((__aligned__(BITS_PER_LONG/8)));
 
 struct flowidn {
-- 
cgit v1.2.3


From 23aebdacb05dab9efdf22b9e0413491cbd5f128f Mon Sep 17 00:00:00 2001
From: Jakub Sitnicki <jkbs@redhat.com>
Date: Wed, 23 Aug 2017 09:58:29 +0200
Subject: ipv6: Compute multipath hash for ICMP errors from offending packet

When forwarding or sending out an ICMPv6 error, look at the embedded
packet that triggered the error and compute a flow hash over its
headers.

This let's us route the ICMP error together with the flow it belongs to
when multipath (ECMP) routing is in use, which in turn makes Path MTU
Discovery work in ECMP load-balanced or anycast setups (RFC 7690).

Granted, end-hosts behind the ECMP router (aka servers) need to reflect
the IPv6 Flow Label for PMTUD to work.

The code is organized to be in parallel with ipv4 stack:

  ip_multipath_l3_keys -> ip6_multipath_l3_keys
  fib_multipath_hash   -> rt6_multipath_hash

Signed-off-by: Jakub Sitnicki <jkbs@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip6_route.h |  1 +
 net/ipv6/icmp.c         |  1 +
 net/ipv6/route.c        | 50 +++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 52 insertions(+)

(limited to 'include/net')

diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 907d39a42f6b..882bc3c7ccde 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -115,6 +115,7 @@ static inline int ip6_route_get_saddr(struct net *net, struct rt6_info *rt,
 
 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
 			    const struct in6_addr *saddr, int oif, int flags);
+u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb);
 
 struct dst_entry *icmp6_dst_alloc(struct net_device *dev, struct flowi6 *fl6);
 
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 4f82830fc068..dd7608cf1d72 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -519,6 +519,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
 	fl6.fl6_icmp_type = type;
 	fl6.fl6_icmp_code = code;
 	fl6.flowi6_uid = sock_net_uid(net, NULL);
+	fl6.mp_hash = rt6_multipath_hash(&fl6, skb);
 	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
 
 	sk = icmpv6_xmit_lock(net);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 9b02064c3335..6c4dd5796a31 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1214,6 +1214,54 @@ struct dst_entry *ip6_route_input_lookup(struct net *net,
 }
 EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
 
+static void ip6_multipath_l3_keys(const struct sk_buff *skb,
+				  struct flow_keys *keys)
+{
+	const struct ipv6hdr *outer_iph = ipv6_hdr(skb);
+	const struct ipv6hdr *key_iph = outer_iph;
+	const struct ipv6hdr *inner_iph;
+	const struct icmp6hdr *icmph;
+	struct ipv6hdr _inner_iph;
+
+	if (likely(outer_iph->nexthdr != IPPROTO_ICMPV6))
+		goto out;
+
+	icmph = icmp6_hdr(skb);
+	if (icmph->icmp6_type != ICMPV6_DEST_UNREACH &&
+	    icmph->icmp6_type != ICMPV6_PKT_TOOBIG &&
+	    icmph->icmp6_type != ICMPV6_TIME_EXCEED &&
+	    icmph->icmp6_type != ICMPV6_PARAMPROB)
+		goto out;
+
+	inner_iph = skb_header_pointer(skb,
+				       skb_transport_offset(skb) + sizeof(*icmph),
+				       sizeof(_inner_iph), &_inner_iph);
+	if (!inner_iph)
+		goto out;
+
+	key_iph = inner_iph;
+out:
+	memset(keys, 0, sizeof(*keys));
+	keys->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
+	keys->addrs.v6addrs.src = key_iph->saddr;
+	keys->addrs.v6addrs.dst = key_iph->daddr;
+	keys->tags.flow_label = ip6_flowinfo(key_iph);
+	keys->basic.ip_proto = key_iph->nexthdr;
+}
+
+/* if skb is set it will be used and fl6 can be NULL */
+u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb)
+{
+	struct flow_keys hash_keys;
+
+	if (skb) {
+		ip6_multipath_l3_keys(skb, &hash_keys);
+		return flow_hash_from_keys(&hash_keys);
+	}
+
+	return get_hash_from_flowi6(fl6);
+}
+
 void ip6_route_input(struct sk_buff *skb)
 {
 	const struct ipv6hdr *iph = ipv6_hdr(skb);
@@ -1232,6 +1280,8 @@ void ip6_route_input(struct sk_buff *skb)
 	tun_info = skb_tunnel_info(skb);
 	if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
 		fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
+	if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6))
+		fl6.mp_hash = rt6_multipath_hash(&fl6, skb);
 	skb_dst_drop(skb);
 	skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
 }
-- 
cgit v1.2.3


From 3fd87127073292538047adf1c9c757e9cab0dd56 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Thu, 24 Aug 2017 14:38:51 -0700
Subject: strparser: initialize all callbacks

commit bbb03029a899 ("strparser: Generalize strparser") added more
function pointers to 'struct strp_callbacks'; however, kcm_attach() was
not updated to initialize them.  This could cause the ->lock() and/or
->unlock() function pointers to be set to garbage values, causing a
crash in strp_work().

Fix the bug by moving the callback structs into static memory, so
unspecified members are zeroed.  Also constify them while we're at it.

This bug was found by syzkaller, which encountered the following splat:

    IP: 0x55
    PGD 3b1ca067
    P4D 3b1ca067
    PUD 3b12f067
    PMD 0

    Oops: 0010 [#1] SMP KASAN
    Dumping ftrace buffer:
       (ftrace buffer empty)
    Modules linked in:
    CPU: 2 PID: 1194 Comm: kworker/u8:1 Not tainted 4.13.0-rc4-next-20170811 #2
    Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
    Workqueue: kstrp strp_work
    task: ffff88006bb0e480 task.stack: ffff88006bb10000
    RIP: 0010:0x55
    RSP: 0018:ffff88006bb17540 EFLAGS: 00010246
    RAX: dffffc0000000000 RBX: ffff88006ce4bd60 RCX: 0000000000000000
    RDX: 1ffff1000d9c97bd RSI: 0000000000000000 RDI: ffff88006ce4bc48
    RBP: ffff88006bb17558 R08: ffffffff81467ab2 R09: 0000000000000000
    R10: ffff88006bb17438 R11: ffff88006bb17940 R12: ffff88006ce4bc48
    R13: ffff88003c683018 R14: ffff88006bb17980 R15: ffff88003c683000
    FS:  0000000000000000(0000) GS:ffff88006de00000(0000) knlGS:0000000000000000
    CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
    CR2: 0000000000000055 CR3: 000000003c145000 CR4: 00000000000006e0
    DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
    DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
    Call Trace:
     process_one_work+0xbf3/0x1bc0 kernel/workqueue.c:2098
     worker_thread+0x223/0x1860 kernel/workqueue.c:2233
     kthread+0x35e/0x430 kernel/kthread.c:231
     ret_from_fork+0x2a/0x40 arch/x86/entry/entry_64.S:431
    Code:  Bad RIP value.
    RIP: 0x55 RSP: ffff88006bb17540
    CR2: 0000000000000055
    ---[ end trace f0e4920047069cee ]---

Here is a C reproducer (requires CONFIG_BPF_SYSCALL=y and
CONFIG_AF_KCM=y):

    #include <linux/bpf.h>
    #include <linux/kcm.h>
    #include <linux/types.h>
    #include <stdint.h>
    #include <sys/ioctl.h>
    #include <sys/socket.h>
    #include <sys/syscall.h>
    #include <unistd.h>

    static const struct bpf_insn bpf_insns[3] = {
        { .code = 0xb7 }, /* BPF_MOV64_IMM(0, 0) */
        { .code = 0x95 }, /* BPF_EXIT_INSN() */
    };

    static const union bpf_attr bpf_attr = {
        .prog_type = 1,
        .insn_cnt = 2,
        .insns = (uintptr_t)&bpf_insns,
        .license = (uintptr_t)"",
    };

    int main(void)
    {
        int bpf_fd = syscall(__NR_bpf, BPF_PROG_LOAD,
                             &bpf_attr, sizeof(bpf_attr));
        int inet_fd = socket(AF_INET, SOCK_STREAM, 0);
        int kcm_fd = socket(AF_KCM, SOCK_DGRAM, 0);

        ioctl(kcm_fd, SIOCKCMATTACH,
              &(struct kcm_attach) { .fd = inet_fd, .bpf_fd = bpf_fd });
    }

Fixes: bbb03029a899 ("strparser: Generalize strparser")
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Tom Herbert <tom@quantonium.net>
Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/strparser.txt |  2 +-
 include/net/strparser.h                |  2 +-
 kernel/bpf/sockmap.c                   | 10 +++++-----
 net/kcm/kcmsock.c                      | 11 +++++------
 net/strparser/strparser.c              |  2 +-
 5 files changed, 13 insertions(+), 14 deletions(-)

(limited to 'include/net')

diff --git a/Documentation/networking/strparser.txt b/Documentation/networking/strparser.txt
index fe01302471ae..13081b3decef 100644
--- a/Documentation/networking/strparser.txt
+++ b/Documentation/networking/strparser.txt
@@ -35,7 +35,7 @@ Functions
 =========
 
 strp_init(struct strparser *strp, struct sock *sk,
-	  struct strp_callbacks *cb)
+	  const struct strp_callbacks *cb)
 
      Called to initialize a stream parser. strp is a struct of type
      strparser that is allocated by the upper layer. sk is the TCP
diff --git a/include/net/strparser.h b/include/net/strparser.h
index 4fe966a0ad92..7dc131d62ad5 100644
--- a/include/net/strparser.h
+++ b/include/net/strparser.h
@@ -138,7 +138,7 @@ void strp_done(struct strparser *strp);
 void strp_stop(struct strparser *strp);
 void strp_check_rcv(struct strparser *strp);
 int strp_init(struct strparser *strp, struct sock *sk,
-	      struct strp_callbacks *cb);
+	      const struct strp_callbacks *cb);
 void strp_data_ready(struct strparser *strp);
 int strp_process(struct strparser *strp, struct sk_buff *orig_skb,
 		 unsigned int orig_offset, size_t orig_len,
diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c
index 78b2bb9370ac..617c239590c2 100644
--- a/kernel/bpf/sockmap.c
+++ b/kernel/bpf/sockmap.c
@@ -368,12 +368,12 @@ static int smap_read_sock_done(struct strparser *strp, int err)
 static int smap_init_sock(struct smap_psock *psock,
 			  struct sock *sk)
 {
-	struct strp_callbacks cb;
+	static const struct strp_callbacks cb = {
+		.rcv_msg = smap_read_sock_strparser,
+		.parse_msg = smap_parse_func_strparser,
+		.read_sock_done = smap_read_sock_done,
+	};
 
-	memset(&cb, 0, sizeof(cb));
-	cb.rcv_msg = smap_read_sock_strparser;
-	cb.parse_msg = smap_parse_func_strparser;
-	cb.read_sock_done = smap_read_sock_done;
 	return strp_init(&psock->strp, sk, &cb);
 }
 
diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index 88ce73288247..48e993b2dbcf 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@ -1376,7 +1376,11 @@ static int kcm_attach(struct socket *sock, struct socket *csock,
 	struct kcm_psock *psock = NULL, *tpsock;
 	struct list_head *head;
 	int index = 0;
-	struct strp_callbacks cb;
+	static const struct strp_callbacks cb = {
+		.rcv_msg = kcm_rcv_strparser,
+		.parse_msg = kcm_parse_func_strparser,
+		.read_sock_done = kcm_read_sock_done,
+	};
 	int err;
 
 	csk = csock->sk;
@@ -1391,11 +1395,6 @@ static int kcm_attach(struct socket *sock, struct socket *csock,
 	psock->sk = csk;
 	psock->bpf_prog = prog;
 
-	cb.rcv_msg = kcm_rcv_strparser;
-	cb.abort_parser = NULL;
-	cb.parse_msg = kcm_parse_func_strparser;
-	cb.read_sock_done = kcm_read_sock_done;
-
 	err = strp_init(&psock->strp, csk, &cb);
 	if (err) {
 		kmem_cache_free(kcm_psockp, psock);
diff --git a/net/strparser/strparser.c b/net/strparser/strparser.c
index 434aa6637a52..d4ea46a5f233 100644
--- a/net/strparser/strparser.c
+++ b/net/strparser/strparser.c
@@ -472,7 +472,7 @@ static void strp_sock_unlock(struct strparser *strp)
 }
 
 int strp_init(struct strparser *strp, struct sock *sk,
-	      struct strp_callbacks *cb)
+	      const struct strp_callbacks *cb)
 {
 
 	if (!cb || !cb->rcv_msg || !cb->parse_msg)
-- 
cgit v1.2.3


From 32d99d0b670299720dd0db92a974c9612c230889 Mon Sep 17 00:00:00 2001
From: David Lebrun <david.lebrun@uclouvain.be>
Date: Fri, 25 Aug 2017 09:56:44 +0200
Subject: ipv6: sr: add support for ip4ip6 encapsulation

This patch enables the SRv6 encapsulation mode to carry an IPv4 payload.
All the infrastructure was already present, I just had to add a parameter
to seg6_do_srh_encap() to specify the inner packet protocol, and perform
some additional checks.

Usage example:
ip route add 1.2.3.4 encap seg6 mode encap segs fc00::1,fc00::2 dev eth0

Signed-off-by: David Lebrun <david.lebrun@uclouvain.be>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/seg6.h       |  3 ++-
 net/ipv6/seg6_iptunnel.c | 47 +++++++++++++++++++++++++++++++++++++----------
 net/ipv6/seg6_local.c    |  2 +-
 3 files changed, 40 insertions(+), 12 deletions(-)

(limited to 'include/net')

diff --git a/include/net/seg6.h b/include/net/seg6.h
index 5379f550f521..099bad59dc90 100644
--- a/include/net/seg6.h
+++ b/include/net/seg6.h
@@ -60,7 +60,8 @@ extern int seg6_local_init(void);
 extern void seg6_local_exit(void);
 
 extern bool seg6_validate_srh(struct ipv6_sr_hdr *srh, int len);
-extern int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh);
+extern int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh,
+			     int proto);
 extern int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh);
 
 #endif
diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c
index 501233040570..5bec7817a7b9 100644
--- a/net/ipv6/seg6_iptunnel.c
+++ b/net/ipv6/seg6_iptunnel.c
@@ -91,7 +91,7 @@ static void set_tun_src(struct net *net, struct net_device *dev,
 }
 
 /* encapsulate an IPv6 packet within an outer IPv6 header with a given SRH */
-int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh)
+int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto)
 {
 	struct net *net = dev_net(skb_dst(skb)->dev);
 	struct ipv6hdr *hdr, *inner_hdr;
@@ -116,15 +116,22 @@ int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh)
 	 * hlim will be decremented in ip6_forward() afterwards and
 	 * decapsulation will overwrite inner hlim with outer hlim
 	 */
-	ip6_flow_hdr(hdr, ip6_tclass(ip6_flowinfo(inner_hdr)),
-		     ip6_flowlabel(inner_hdr));
-	hdr->hop_limit = inner_hdr->hop_limit;
+
+	if (skb->protocol == htons(ETH_P_IPV6)) {
+		ip6_flow_hdr(hdr, ip6_tclass(ip6_flowinfo(inner_hdr)),
+			     ip6_flowlabel(inner_hdr));
+		hdr->hop_limit = inner_hdr->hop_limit;
+	} else {
+		ip6_flow_hdr(hdr, 0, 0);
+		hdr->hop_limit = ip6_dst_hoplimit(skb_dst(skb));
+	}
+
 	hdr->nexthdr = NEXTHDR_ROUTING;
 
 	isrh = (void *)hdr + sizeof(*hdr);
 	memcpy(isrh, osrh, hdrlen);
 
-	isrh->nexthdr = NEXTHDR_IPV6;
+	isrh->nexthdr = proto;
 
 	hdr->daddr = isrh->segments[isrh->first_segment];
 	set_tun_src(net, skb->dev, &hdr->daddr, &hdr->saddr);
@@ -199,7 +206,7 @@ static int seg6_do_srh(struct sk_buff *skb)
 {
 	struct dst_entry *dst = skb_dst(skb);
 	struct seg6_iptunnel_encap *tinfo;
-	int err = 0;
+	int proto, err = 0;
 
 	tinfo = seg6_encap_lwtunnel(dst->lwtstate);
 
@@ -210,17 +217,31 @@ static int seg6_do_srh(struct sk_buff *skb)
 
 	switch (tinfo->mode) {
 	case SEG6_IPTUN_MODE_INLINE:
+		if (skb->protocol != htons(ETH_P_IPV6))
+			return -EINVAL;
+
 		err = seg6_do_srh_inline(skb, tinfo->srh);
+		if (err)
+			return err;
+
 		skb_reset_inner_headers(skb);
 		break;
 	case SEG6_IPTUN_MODE_ENCAP:
-		err = seg6_do_srh_encap(skb, tinfo->srh);
+		if (skb->protocol == htons(ETH_P_IPV6))
+			proto = IPPROTO_IPV6;
+		else if (skb->protocol == htons(ETH_P_IP))
+			proto = IPPROTO_IPIP;
+		else
+			return -EINVAL;
+
+		err = seg6_do_srh_encap(skb, tinfo->srh, proto);
+		if (err)
+			return err;
+
+		skb->protocol = htons(ETH_P_IPV6);
 		break;
 	}
 
-	if (err)
-		return err;
-
 	ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
 	skb_set_transport_header(skb, sizeof(struct ipv6hdr));
 
@@ -334,6 +355,9 @@ static int seg6_build_state(struct nlattr *nla,
 	struct seg6_lwt *slwt;
 	int err;
 
+	if (family != AF_INET && family != AF_INET6)
+		return -EINVAL;
+
 	err = nla_parse_nested(tb, SEG6_IPTUNNEL_MAX, nla,
 			       seg6_iptunnel_policy, extack);
 
@@ -356,6 +380,9 @@ static int seg6_build_state(struct nlattr *nla,
 
 	switch (tuninfo->mode) {
 	case SEG6_IPTUN_MODE_INLINE:
+		if (family != AF_INET6)
+			return -EINVAL;
+
 		break;
 	case SEG6_IPTUN_MODE_ENCAP:
 		break;
diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c
index 147680e7a00c..609b94e970de 100644
--- a/net/ipv6/seg6_local.c
+++ b/net/ipv6/seg6_local.c
@@ -290,7 +290,7 @@ static int input_action_end_b6_encap(struct sk_buff *skb,
 	skb_reset_inner_headers(skb);
 	skb->encapsulation = 1;
 
-	err = seg6_do_srh_encap(skb, slwt->srh);
+	err = seg6_do_srh_encap(skb, slwt->srh, IPPROTO_IPV6);
 	if (err)
 		goto drop;
 
-- 
cgit v1.2.3


From 143976ce992fcf3bfc0f4d15d5726bb492dcf262 Mon Sep 17 00:00:00 2001
From: WANG Cong <xiyou.wangcong@gmail.com>
Date: Thu, 24 Aug 2017 16:51:29 -0700
Subject: net_sched: remove tc class reference counting

For TC classes, their ->get() and ->put() are always paired, and the
reference counting is completely useless, because:

1) For class modification and dumping paths, we already hold RTNL lock,
   so all of these ->get(),->change(),->put() are atomic.

2) For filter bindiing/unbinding, we use other reference counter than
   this one, and they should have RTNL lock too.

3) For ->qlen_notify(), it is special because it is called on ->enqueue()
   path, but we already hold qdisc tree lock there, and we hold this
   tree lock when graft or delete the class too, so it should not be gone
   or changed until we release the tree lock.

Therefore, this patch removes ->get() and ->put(), but:

1) Adds a new ->find() to find the pointer to a class by classid, no
   refcnt.

2) Move the original class destroy upon the last refcnt into ->delete(),
   right after releasing tree lock. This is fine because the class is
   already removed from hash when holding the lock.

For those who also use ->put() as ->unbind(), just rename them to reflect
this change.

Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h |  3 +--
 net/sched/cls_api.c       | 17 ++++++-----------
 net/sched/sch_api.c       | 21 ++++++++-------------
 net/sched/sch_atm.c       | 30 ++++++++++++++++--------------
 net/sched/sch_cbq.c       | 41 ++++-------------------------------------
 net/sched/sch_drr.c       | 30 +++++-------------------------
 net/sched/sch_dsmark.c    | 17 ++++++++---------
 net/sched/sch_fq_codel.c  |  9 ++++-----
 net/sched/sch_hfsc.c      | 32 +++++---------------------------
 net/sched/sch_htb.c       | 33 +++++++--------------------------
 net/sched/sch_ingress.c   | 20 +++++++++-----------
 net/sched/sch_mq.c        |  9 ++-------
 net/sched/sch_mqprio.c    |  9 ++-------
 net/sched/sch_multiq.c    | 11 +++++------
 net/sched/sch_netem.c     |  9 ++-------
 net/sched/sch_prio.c      | 11 +++++------
 net/sched/sch_qfq.c       | 30 +++++-------------------------
 net/sched/sch_red.c       |  9 ++-------
 net/sched/sch_sfb.c       |  9 ++++-----
 net/sched/sch_sfq.c       |  9 ++++-----
 net/sched/sch_tbf.c       |  9 ++-------
 21 files changed, 106 insertions(+), 262 deletions(-)

(limited to 'include/net')

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 1688f0f6c7ba..d817585aa5df 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -147,8 +147,7 @@ struct Qdisc_class_ops {
 	void			(*qlen_notify)(struct Qdisc *, unsigned long);
 
 	/* Class manipulation routines */
-	unsigned long		(*get)(struct Qdisc *, u32 classid);
-	void			(*put)(struct Qdisc *, unsigned long);
+	unsigned long		(*find)(struct Qdisc *, u32 classid);
 	int			(*change)(struct Qdisc *, u32, u32,
 					struct nlattr **, unsigned long *);
 	int			(*delete)(struct Qdisc *, unsigned long);
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index eef6b077f30e..d470a4e2de58 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -586,7 +586,7 @@ replay:
 
 	/* Do we search for filter, attached to class? */
 	if (TC_H_MIN(parent)) {
-		cl = cops->get(q, parent);
+		cl = cops->find(q, parent);
 		if (cl == 0)
 			return -ENOENT;
 	}
@@ -716,8 +716,6 @@ replay:
 errout:
 	if (chain)
 		tcf_chain_put(chain);
-	if (cl)
-		cops->put(q, cl);
 	if (err == -EAGAIN)
 		/* Replay the request. */
 		goto replay;
@@ -822,17 +820,17 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
 		goto out;
 	cops = q->ops->cl_ops;
 	if (!cops)
-		goto errout;
+		goto out;
 	if (!cops->tcf_block)
-		goto errout;
+		goto out;
 	if (TC_H_MIN(tcm->tcm_parent)) {
-		cl = cops->get(q, tcm->tcm_parent);
+		cl = cops->find(q, tcm->tcm_parent);
 		if (cl == 0)
-			goto errout;
+			goto out;
 	}
 	block = cops->tcf_block(q, cl);
 	if (!block)
-		goto errout;
+		goto out;
 
 	index_start = cb->args[0];
 	index = 0;
@@ -847,9 +845,6 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
 
 	cb->args[0] = index;
 
-errout:
-	if (cl)
-		cops->put(q, cl);
 out:
 	return skb->len;
 }
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 3ef4eb578739..e7f8e4bfd4ec 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -153,7 +153,7 @@ int register_qdisc(struct Qdisc_ops *qops)
 	if (qops->cl_ops) {
 		const struct Qdisc_class_ops *cops = qops->cl_ops;
 
-		if (!(cops->get && cops->put && cops->walk && cops->leaf))
+		if (!(cops->find && cops->walk && cops->leaf))
 			goto out_einval;
 
 		if (cops->tcf_block && !(cops->bind_tcf && cops->unbind_tcf))
@@ -320,12 +320,11 @@ static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
 
 	if (cops == NULL)
 		return NULL;
-	cl = cops->get(p, classid);
+	cl = cops->find(p, classid);
 
 	if (cl == 0)
 		return NULL;
 	leaf = cops->leaf(p, cl);
-	cops->put(p, cl);
 	return leaf;
 }
 
@@ -756,9 +755,8 @@ void qdisc_tree_reduce_backlog(struct Qdisc *sch, unsigned int n,
 		}
 		cops = sch->ops->cl_ops;
 		if (notify && cops->qlen_notify) {
-			cl = cops->get(sch, parentid);
+			cl = cops->find(sch, parentid);
 			cops->qlen_notify(sch, cl);
-			cops->put(sch, cl);
 		}
 		sch->q.qlen -= n;
 		sch->qstats.backlog -= len;
@@ -955,11 +953,11 @@ skip:
 
 		err = -EOPNOTSUPP;
 		if (cops && cops->graft) {
-			unsigned long cl = cops->get(parent, classid);
-			if (cl) {
+			unsigned long cl = cops->find(parent, classid);
+
+			if (cl)
 				err = cops->graft(parent, cl, new, &old);
-				cops->put(parent, cl);
-			} else
+			else
 				err = -ENOENT;
 		}
 		if (!err)
@@ -1739,7 +1737,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
 		clid = TC_H_MAKE(qid, clid);
 
 	if (clid)
-		cl = cops->get(q, clid);
+		cl = cops->find(q, clid);
 
 	if (cl == 0) {
 		err = -ENOENT;
@@ -1773,9 +1771,6 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
 		tclass_notify(net, skb, n, q, new_cl, RTM_NEWTCLASS);
 
 out:
-	if (cl)
-		cops->put(q, cl);
-
 	return err;
 }
 
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index 2732950766a9..c5fcdf1a58a0 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -108,23 +108,29 @@ static struct Qdisc *atm_tc_leaf(struct Qdisc *sch, unsigned long cl)
 	return flow ? flow->q : NULL;
 }
 
-static unsigned long atm_tc_get(struct Qdisc *sch, u32 classid)
+static unsigned long atm_tc_find(struct Qdisc *sch, u32 classid)
 {
 	struct atm_qdisc_data *p __maybe_unused = qdisc_priv(sch);
 	struct atm_flow_data *flow;
 
-	pr_debug("atm_tc_get(sch %p,[qdisc %p],classid %x)\n", sch, p, classid);
+	pr_debug("%s(sch %p,[qdisc %p],classid %x)\n", __func__, sch, p, classid);
 	flow = lookup_flow(sch, classid);
-	if (flow)
-		flow->ref++;
-	pr_debug("atm_tc_get: flow %p\n", flow);
+	pr_debug("%s: flow %p\n", __func__, flow);
 	return (unsigned long)flow;
 }
 
 static unsigned long atm_tc_bind_filter(struct Qdisc *sch,
 					unsigned long parent, u32 classid)
 {
-	return atm_tc_get(sch, classid);
+	struct atm_qdisc_data *p __maybe_unused = qdisc_priv(sch);
+	struct atm_flow_data *flow;
+
+	pr_debug("%s(sch %p,[qdisc %p],classid %x)\n", __func__, sch, p, classid);
+	flow = lookup_flow(sch, classid);
+	if (flow)
+		flow->ref++;
+	pr_debug("%s: flow %p\n", __func__, flow);
+	return (unsigned long)flow;
 }
 
 /*
@@ -234,7 +240,7 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent,
 		excess = NULL;
 	else {
 		excess = (struct atm_flow_data *)
-			atm_tc_get(sch, nla_get_u32(tb[TCA_ATM_EXCESS]));
+			atm_tc_find(sch, nla_get_u32(tb[TCA_ATM_EXCESS]));
 		if (!excess)
 			return -ENOENT;
 	}
@@ -262,10 +268,9 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent,
 
 		for (i = 1; i < 0x8000; i++) {
 			classid = TC_H_MAKE(sch->handle, 0x8000 | i);
-			cl = atm_tc_get(sch, classid);
+			cl = atm_tc_find(sch, classid);
 			if (!cl)
 				break;
-			atm_tc_put(sch, cl);
 		}
 	}
 	pr_debug("atm_tc_change: new id %x\n", classid);
@@ -305,8 +310,6 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent,
 	*arg = (unsigned long)flow;
 	return 0;
 err_out:
-	if (excess)
-		atm_tc_put(sch, (unsigned long)excess);
 	sockfd_put(sock);
 	return error;
 }
@@ -377,7 +380,7 @@ static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 	result = TC_ACT_OK;	/* be nice to gcc */
 	flow = NULL;
 	if (TC_H_MAJ(skb->priority) != sch->handle ||
-	    !(flow = (struct atm_flow_data *)atm_tc_get(sch, skb->priority))) {
+	    !(flow = (struct atm_flow_data *)atm_tc_find(sch, skb->priority))) {
 		struct tcf_proto *fl;
 
 		list_for_each_entry(flow, &p->flows, list) {
@@ -655,8 +658,7 @@ static int atm_tc_dump(struct Qdisc *sch, struct sk_buff *skb)
 static const struct Qdisc_class_ops atm_class_ops = {
 	.graft		= atm_tc_graft,
 	.leaf		= atm_tc_leaf,
-	.get		= atm_tc_get,
-	.put		= atm_tc_put,
+	.find		= atm_tc_find,
 	.change		= atm_tc_change,
 	.delete		= atm_tc_delete,
 	.walk		= atm_tc_walk,
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 1bdb0106f342..3ec8bec109bb 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -129,7 +129,6 @@ struct cbq_class {
 	struct tcf_proto __rcu	*filter_list;
 	struct tcf_block	*block;
 
-	int			refcnt;
 	int			filters;
 
 	struct cbq_class	*defaults[TC_PRIO_MAX + 1];
@@ -1155,7 +1154,6 @@ static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
 	if (err < 0)
 		goto put_rtab;
 
-	q->link.refcnt = 1;
 	q->link.sibling = &q->link;
 	q->link.common.classid = sch->handle;
 	q->link.qdisc = sch;
@@ -1388,16 +1386,11 @@ static void cbq_qlen_notify(struct Qdisc *sch, unsigned long arg)
 	cbq_deactivate_class(cl);
 }
 
-static unsigned long cbq_get(struct Qdisc *sch, u32 classid)
+static unsigned long cbq_find(struct Qdisc *sch, u32 classid)
 {
 	struct cbq_sched_data *q = qdisc_priv(sch);
-	struct cbq_class *cl = cbq_class_lookup(q, classid);
 
-	if (cl) {
-		cl->refcnt++;
-		return (unsigned long)cl;
-	}
-	return 0;
+	return (unsigned long)cbq_class_lookup(q, classid);
 }
 
 static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
@@ -1443,25 +1436,6 @@ static void cbq_destroy(struct Qdisc *sch)
 	qdisc_class_hash_destroy(&q->clhash);
 }
 
-static void cbq_put(struct Qdisc *sch, unsigned long arg)
-{
-	struct cbq_class *cl = (struct cbq_class *)arg;
-
-	if (--cl->refcnt == 0) {
-#ifdef CONFIG_NET_CLS_ACT
-		spinlock_t *root_lock = qdisc_root_sleeping_lock(sch);
-		struct cbq_sched_data *q = qdisc_priv(sch);
-
-		spin_lock_bh(root_lock);
-		if (q->rx_class == cl)
-			q->rx_class = NULL;
-		spin_unlock_bh(root_lock);
-#endif
-
-		cbq_destroy_class(sch, cl);
-	}
-}
-
 static int
 cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **tca,
 		 unsigned long *arg)
@@ -1608,7 +1582,6 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
 
 	cl->R_tab = rtab;
 	rtab = NULL;
-	cl->refcnt = 1;
 	cl->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid);
 	if (!cl->q)
 		cl->q = &noop_qdisc;
@@ -1689,12 +1662,7 @@ static int cbq_delete(struct Qdisc *sch, unsigned long arg)
 	cbq_rmprio(q, cl);
 	sch_tree_unlock(sch);
 
-	BUG_ON(--cl->refcnt == 0);
-	/*
-	 * This shouldn't happen: we "hold" one cops->get() when called
-	 * from tc_ctl_tclass; the destroy method is done from cops->put().
-	 */
-
+	cbq_destroy_class(sch, cl);
 	return 0;
 }
 
@@ -1760,8 +1728,7 @@ static const struct Qdisc_class_ops cbq_class_ops = {
 	.graft		=	cbq_graft,
 	.leaf		=	cbq_leaf,
 	.qlen_notify	=	cbq_qlen_notify,
-	.get		=	cbq_get,
-	.put		=	cbq_put,
+	.find		=	cbq_find,
 	.change		=	cbq_change_class,
 	.delete		=	cbq_delete,
 	.walk		=	cbq_walk,
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index 1d2f6235dfcf..2d0e8d4bdc29 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -20,7 +20,6 @@
 
 struct drr_class {
 	struct Qdisc_class_common	common;
-	unsigned int			refcnt;
 	unsigned int			filter_cnt;
 
 	struct gnet_stats_basic_packed		bstats;
@@ -111,7 +110,6 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
 	if (cl == NULL)
 		return -ENOBUFS;
 
-	cl->refcnt	   = 1;
 	cl->common.classid = classid;
 	cl->quantum	   = quantum;
 	cl->qdisc	   = qdisc_create_dflt(sch->dev_queue,
@@ -163,32 +161,15 @@ static int drr_delete_class(struct Qdisc *sch, unsigned long arg)
 	drr_purge_queue(cl);
 	qdisc_class_hash_remove(&q->clhash, &cl->common);
 
-	BUG_ON(--cl->refcnt == 0);
-	/*
-	 * This shouldn't happen: we "hold" one cops->get() when called
-	 * from tc_ctl_tclass; the destroy method is done from cops->put().
-	 */
-
 	sch_tree_unlock(sch);
-	return 0;
-}
-
-static unsigned long drr_get_class(struct Qdisc *sch, u32 classid)
-{
-	struct drr_class *cl = drr_find_class(sch, classid);
 
-	if (cl != NULL)
-		cl->refcnt++;
-
-	return (unsigned long)cl;
+	drr_destroy_class(sch, cl);
+	return 0;
 }
 
-static void drr_put_class(struct Qdisc *sch, unsigned long arg)
+static unsigned long drr_search_class(struct Qdisc *sch, u32 classid)
 {
-	struct drr_class *cl = (struct drr_class *)arg;
-
-	if (--cl->refcnt == 0)
-		drr_destroy_class(sch, cl);
+	return (unsigned long)drr_find_class(sch, classid);
 }
 
 static struct tcf_block *drr_tcf_block(struct Qdisc *sch, unsigned long cl)
@@ -478,8 +459,7 @@ static void drr_destroy_qdisc(struct Qdisc *sch)
 static const struct Qdisc_class_ops drr_class_ops = {
 	.change		= drr_change_class,
 	.delete		= drr_delete_class,
-	.get		= drr_get_class,
-	.put		= drr_put_class,
+	.find		= drr_search_class,
 	.tcf_block	= drr_tcf_block,
 	.bind_tcf	= drr_bind_tcf,
 	.unbind_tcf	= drr_unbind_tcf,
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index 6d94fcc3592a..2836c80c7aa5 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -85,21 +85,21 @@ static struct Qdisc *dsmark_leaf(struct Qdisc *sch, unsigned long arg)
 	return p->q;
 }
 
-static unsigned long dsmark_get(struct Qdisc *sch, u32 classid)
+static unsigned long dsmark_find(struct Qdisc *sch, u32 classid)
 {
-	pr_debug("%s(sch %p,[qdisc %p],classid %x)\n",
-		 __func__, sch, qdisc_priv(sch), classid);
-
 	return TC_H_MIN(classid) + 1;
 }
 
 static unsigned long dsmark_bind_filter(struct Qdisc *sch,
 					unsigned long parent, u32 classid)
 {
-	return dsmark_get(sch, classid);
+	pr_debug("%s(sch %p,[qdisc %p],classid %x)\n",
+		 __func__, sch, qdisc_priv(sch), classid);
+
+	return dsmark_find(sch, classid);
 }
 
-static void dsmark_put(struct Qdisc *sch, unsigned long cl)
+static void dsmark_unbind_filter(struct Qdisc *sch, unsigned long cl)
 {
 }
 
@@ -469,14 +469,13 @@ nla_put_failure:
 static const struct Qdisc_class_ops dsmark_class_ops = {
 	.graft		=	dsmark_graft,
 	.leaf		=	dsmark_leaf,
-	.get		=	dsmark_get,
-	.put		=	dsmark_put,
+	.find		=	dsmark_find,
 	.change		=	dsmark_change,
 	.delete		=	dsmark_delete,
 	.walk		=	dsmark_walk,
 	.tcf_block	=	dsmark_tcf_block,
 	.bind_tcf	=	dsmark_bind_filter,
-	.unbind_tcf	=	dsmark_put,
+	.unbind_tcf	=	dsmark_unbind_filter,
 	.dump		=	dsmark_dump_class,
 };
 
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index 337f2d6d81e4..7699b50688cd 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -579,7 +579,7 @@ static struct Qdisc *fq_codel_leaf(struct Qdisc *sch, unsigned long arg)
 	return NULL;
 }
 
-static unsigned long fq_codel_get(struct Qdisc *sch, u32 classid)
+static unsigned long fq_codel_find(struct Qdisc *sch, u32 classid)
 {
 	return 0;
 }
@@ -592,7 +592,7 @@ static unsigned long fq_codel_bind(struct Qdisc *sch, unsigned long parent,
 	return 0;
 }
 
-static void fq_codel_put(struct Qdisc *q, unsigned long cl)
+static void fq_codel_unbind(struct Qdisc *q, unsigned long cl)
 {
 }
 
@@ -683,11 +683,10 @@ static void fq_codel_walk(struct Qdisc *sch, struct qdisc_walker *arg)
 
 static const struct Qdisc_class_ops fq_codel_class_ops = {
 	.leaf		=	fq_codel_leaf,
-	.get		=	fq_codel_get,
-	.put		=	fq_codel_put,
+	.find		=	fq_codel_find,
 	.tcf_block	=	fq_codel_tcf_block,
 	.bind_tcf	=	fq_codel_bind,
-	.unbind_tcf	=	fq_codel_put,
+	.unbind_tcf	=	fq_codel_unbind,
 	.dump		=	fq_codel_dump_class,
 	.dump_stats	=	fq_codel_dump_class_stats,
 	.walk		=	fq_codel_walk,
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 15f09cb9f1ff..7c7820d0fdc7 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -110,7 +110,6 @@ enum hfsc_class_flags {
 
 struct hfsc_class {
 	struct Qdisc_class_common cl_common;
-	unsigned int	refcnt;		/* usage count */
 
 	struct gnet_stats_basic_packed bstats;
 	struct gnet_stats_queue qstats;
@@ -1045,7 +1044,6 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
 		hfsc_change_usc(cl, usc, 0);
 
 	cl->cl_common.classid = classid;
-	cl->refcnt    = 1;
 	cl->sched     = q;
 	cl->cl_parent = parent;
 	cl->qdisc = qdisc_create_dflt(sch->dev_queue,
@@ -1101,13 +1099,9 @@ hfsc_delete_class(struct Qdisc *sch, unsigned long arg)
 	hfsc_purge_queue(sch, cl);
 	qdisc_class_hash_remove(&q->clhash, &cl->cl_common);
 
-	BUG_ON(--cl->refcnt == 0);
-	/*
-	 * This shouldn't happen: we "hold" one cops->get() when called
-	 * from tc_ctl_tclass; the destroy method is done from cops->put().
-	 */
-
 	sch_tree_unlock(sch);
+
+	hfsc_destroy_class(sch, cl);
 	return 0;
 }
 
@@ -1208,23 +1202,9 @@ hfsc_qlen_notify(struct Qdisc *sch, unsigned long arg)
 }
 
 static unsigned long
-hfsc_get_class(struct Qdisc *sch, u32 classid)
-{
-	struct hfsc_class *cl = hfsc_find_class(classid, sch);
-
-	if (cl != NULL)
-		cl->refcnt++;
-
-	return (unsigned long)cl;
-}
-
-static void
-hfsc_put_class(struct Qdisc *sch, unsigned long arg)
+hfsc_search_class(struct Qdisc *sch, u32 classid)
 {
-	struct hfsc_class *cl = (struct hfsc_class *)arg;
-
-	if (--cl->refcnt == 0)
-		hfsc_destroy_class(sch, cl);
+	return (unsigned long)hfsc_find_class(classid, sch);
 }
 
 static unsigned long
@@ -1413,7 +1393,6 @@ hfsc_init_qdisc(struct Qdisc *sch, struct nlattr *opt)
 		goto err_tcf;
 
 	q->root.cl_common.classid = sch->handle;
-	q->root.refcnt  = 1;
 	q->root.sched   = q;
 	q->root.qdisc = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
 					  sch->handle);
@@ -1661,8 +1640,7 @@ static const struct Qdisc_class_ops hfsc_class_ops = {
 	.graft		= hfsc_graft_class,
 	.leaf		= hfsc_class_leaf,
 	.qlen_notify	= hfsc_qlen_notify,
-	.get		= hfsc_get_class,
-	.put		= hfsc_put_class,
+	.find		= hfsc_search_class,
 	.bind_tcf	= hfsc_bind_tcf,
 	.unbind_tcf	= hfsc_unbind_tcf,
 	.tcf_block	= hfsc_tcf_block,
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index dcf3c85e1f4f..f955b59d3c7c 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -107,7 +107,6 @@ struct htb_class {
 	struct tcf_proto __rcu	*filter_list;	/* class attached filters */
 	struct tcf_block	*block;
 	int			filter_cnt;
-	int			refcnt;		/* usage count of this class */
 
 	int			level;		/* our level (see above) */
 	unsigned int		children;
@@ -193,6 +192,10 @@ static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch)
 	return container_of(clc, struct htb_class, common);
 }
 
+static unsigned long htb_search(struct Qdisc *sch, u32 handle)
+{
+	return (unsigned long)htb_find(handle, sch);
+}
 /**
  * htb_classify - classify a packet into class
  *
@@ -1189,14 +1192,6 @@ static void htb_qlen_notify(struct Qdisc *sch, unsigned long arg)
 	htb_deactivate(qdisc_priv(sch), cl);
 }
 
-static unsigned long htb_get(struct Qdisc *sch, u32 classid)
-{
-	struct htb_class *cl = htb_find(classid, sch);
-	if (cl)
-		cl->refcnt++;
-	return (unsigned long)cl;
-}
-
 static inline int htb_parent_last_child(struct htb_class *cl)
 {
 	if (!cl->parent)
@@ -1316,22 +1311,10 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg)
 	if (last_child)
 		htb_parent_to_leaf(q, cl, new_q);
 
-	BUG_ON(--cl->refcnt == 0);
-	/*
-	 * This shouldn't happen: we "hold" one cops->get() when called
-	 * from tc_ctl_tclass; the destroy method is done from cops->put().
-	 */
-
 	sch_tree_unlock(sch);
-	return 0;
-}
 
-static void htb_put(struct Qdisc *sch, unsigned long arg)
-{
-	struct htb_class *cl = (struct htb_class *)arg;
-
-	if (--cl->refcnt == 0)
-		htb_destroy_class(sch, cl);
+	htb_destroy_class(sch, cl);
+	return 0;
 }
 
 static int htb_change_class(struct Qdisc *sch, u32 classid,
@@ -1422,7 +1405,6 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
 			}
 		}
 
-		cl->refcnt = 1;
 		cl->children = 0;
 		INIT_LIST_HEAD(&cl->un.leaf.drop_list);
 		RB_CLEAR_NODE(&cl->pq_node);
@@ -1598,8 +1580,7 @@ static const struct Qdisc_class_ops htb_class_ops = {
 	.graft		=	htb_graft,
 	.leaf		=	htb_leaf,
 	.qlen_notify	=	htb_qlen_notify,
-	.get		=	htb_get,
-	.put		=	htb_put,
+	.find		=	htb_search,
 	.change		=	htb_change_class,
 	.delete		=	htb_delete,
 	.walk		=	htb_walk,
diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c
index a15c543c3569..44de4ee51ce9 100644
--- a/net/sched/sch_ingress.c
+++ b/net/sched/sch_ingress.c
@@ -27,7 +27,7 @@ static struct Qdisc *ingress_leaf(struct Qdisc *sch, unsigned long arg)
 	return NULL;
 }
 
-static unsigned long ingress_get(struct Qdisc *sch, u32 classid)
+static unsigned long ingress_find(struct Qdisc *sch, u32 classid)
 {
 	return TC_H_MIN(classid) + 1;
 }
@@ -35,10 +35,10 @@ static unsigned long ingress_get(struct Qdisc *sch, u32 classid)
 static unsigned long ingress_bind_filter(struct Qdisc *sch,
 					 unsigned long parent, u32 classid)
 {
-	return ingress_get(sch, classid);
+	return ingress_find(sch, classid);
 }
 
-static void ingress_put(struct Qdisc *sch, unsigned long cl)
+static void ingress_unbind_filter(struct Qdisc *sch, unsigned long cl)
 {
 }
 
@@ -94,12 +94,11 @@ nla_put_failure:
 
 static const struct Qdisc_class_ops ingress_class_ops = {
 	.leaf		=	ingress_leaf,
-	.get		=	ingress_get,
-	.put		=	ingress_put,
+	.find		=	ingress_find,
 	.walk		=	ingress_walk,
 	.tcf_block	=	ingress_tcf_block,
 	.bind_tcf	=	ingress_bind_filter,
-	.unbind_tcf	=	ingress_put,
+	.unbind_tcf	=	ingress_unbind_filter,
 };
 
 static struct Qdisc_ops ingress_qdisc_ops __read_mostly = {
@@ -117,7 +116,7 @@ struct clsact_sched_data {
 	struct tcf_block *egress_block;
 };
 
-static unsigned long clsact_get(struct Qdisc *sch, u32 classid)
+static unsigned long clsact_find(struct Qdisc *sch, u32 classid)
 {
 	switch (TC_H_MIN(classid)) {
 	case TC_H_MIN(TC_H_MIN_INGRESS):
@@ -131,7 +130,7 @@ static unsigned long clsact_get(struct Qdisc *sch, u32 classid)
 static unsigned long clsact_bind_filter(struct Qdisc *sch,
 					unsigned long parent, u32 classid)
 {
-	return clsact_get(sch, classid);
+	return clsact_find(sch, classid);
 }
 
 static struct tcf_block *clsact_tcf_block(struct Qdisc *sch, unsigned long cl)
@@ -183,12 +182,11 @@ static void clsact_destroy(struct Qdisc *sch)
 
 static const struct Qdisc_class_ops clsact_class_ops = {
 	.leaf		=	ingress_leaf,
-	.get		=	clsact_get,
-	.put		=	ingress_put,
+	.find		=	clsact_find,
 	.walk		=	ingress_walk,
 	.tcf_block	=	clsact_tcf_block,
 	.bind_tcf	=	clsact_bind_filter,
-	.unbind_tcf	=	ingress_put,
+	.unbind_tcf	=	ingress_unbind_filter,
 };
 
 static struct Qdisc_ops clsact_qdisc_ops __read_mostly = {
diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c
index cadfdd4f1e52..f3a3e507422b 100644
--- a/net/sched/sch_mq.c
+++ b/net/sched/sch_mq.c
@@ -165,7 +165,7 @@ static struct Qdisc *mq_leaf(struct Qdisc *sch, unsigned long cl)
 	return dev_queue->qdisc_sleeping;
 }
 
-static unsigned long mq_get(struct Qdisc *sch, u32 classid)
+static unsigned long mq_find(struct Qdisc *sch, u32 classid)
 {
 	unsigned int ntx = TC_H_MIN(classid);
 
@@ -174,10 +174,6 @@ static unsigned long mq_get(struct Qdisc *sch, u32 classid)
 	return ntx;
 }
 
-static void mq_put(struct Qdisc *sch, unsigned long cl)
-{
-}
-
 static int mq_dump_class(struct Qdisc *sch, unsigned long cl,
 			 struct sk_buff *skb, struct tcmsg *tcm)
 {
@@ -223,8 +219,7 @@ static const struct Qdisc_class_ops mq_class_ops = {
 	.select_queue	= mq_select_queue,
 	.graft		= mq_graft,
 	.leaf		= mq_leaf,
-	.get		= mq_get,
-	.put		= mq_put,
+	.find		= mq_find,
 	.walk		= mq_walk,
 	.dump		= mq_dump_class,
 	.dump_stats	= mq_dump_class_stats,
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
index 2165a05994b7..6bcdfe6e7b63 100644
--- a/net/sched/sch_mqprio.c
+++ b/net/sched/sch_mqprio.c
@@ -277,7 +277,7 @@ static struct Qdisc *mqprio_leaf(struct Qdisc *sch, unsigned long cl)
 	return dev_queue->qdisc_sleeping;
 }
 
-static unsigned long mqprio_get(struct Qdisc *sch, u32 classid)
+static unsigned long mqprio_find(struct Qdisc *sch, u32 classid)
 {
 	struct net_device *dev = qdisc_dev(sch);
 	unsigned int ntx = TC_H_MIN(classid);
@@ -287,10 +287,6 @@ static unsigned long mqprio_get(struct Qdisc *sch, u32 classid)
 	return ntx;
 }
 
-static void mqprio_put(struct Qdisc *sch, unsigned long cl)
-{
-}
-
 static int mqprio_dump_class(struct Qdisc *sch, unsigned long cl,
 			 struct sk_buff *skb, struct tcmsg *tcm)
 {
@@ -403,8 +399,7 @@ static void mqprio_walk(struct Qdisc *sch, struct qdisc_walker *arg)
 static const struct Qdisc_class_ops mqprio_class_ops = {
 	.graft		= mqprio_graft,
 	.leaf		= mqprio_leaf,
-	.get		= mqprio_get,
-	.put		= mqprio_put,
+	.find		= mqprio_find,
 	.walk		= mqprio_walk,
 	.dump		= mqprio_dump_class,
 	.dump_stats	= mqprio_dump_class_stats,
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
index f143b7bbaa0d..a5df979b6248 100644
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -306,7 +306,7 @@ multiq_leaf(struct Qdisc *sch, unsigned long arg)
 	return q->queues[band];
 }
 
-static unsigned long multiq_get(struct Qdisc *sch, u32 classid)
+static unsigned long multiq_find(struct Qdisc *sch, u32 classid)
 {
 	struct multiq_sched_data *q = qdisc_priv(sch);
 	unsigned long band = TC_H_MIN(classid);
@@ -319,11 +319,11 @@ static unsigned long multiq_get(struct Qdisc *sch, u32 classid)
 static unsigned long multiq_bind(struct Qdisc *sch, unsigned long parent,
 				 u32 classid)
 {
-	return multiq_get(sch, classid);
+	return multiq_find(sch, classid);
 }
 
 
-static void multiq_put(struct Qdisc *q, unsigned long cl)
+static void multiq_unbind(struct Qdisc *q, unsigned long cl)
 {
 }
 
@@ -385,12 +385,11 @@ static struct tcf_block *multiq_tcf_block(struct Qdisc *sch, unsigned long cl)
 static const struct Qdisc_class_ops multiq_class_ops = {
 	.graft		=	multiq_graft,
 	.leaf		=	multiq_leaf,
-	.get		=	multiq_get,
-	.put		=	multiq_put,
+	.find		=	multiq_find,
 	.walk		=	multiq_walk,
 	.tcf_block	=	multiq_tcf_block,
 	.bind_tcf	=	multiq_bind,
-	.unbind_tcf	=	multiq_put,
+	.unbind_tcf	=	multiq_unbind,
 	.dump		=	multiq_dump_class,
 	.dump_stats	=	multiq_dump_class_stats,
 };
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 1b3dd6190e93..cf5aad0aabfc 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -1096,15 +1096,11 @@ static struct Qdisc *netem_leaf(struct Qdisc *sch, unsigned long arg)
 	return q->qdisc;
 }
 
-static unsigned long netem_get(struct Qdisc *sch, u32 classid)
+static unsigned long netem_find(struct Qdisc *sch, u32 classid)
 {
 	return 1;
 }
 
-static void netem_put(struct Qdisc *sch, unsigned long arg)
-{
-}
-
 static void netem_walk(struct Qdisc *sch, struct qdisc_walker *walker)
 {
 	if (!walker->stop) {
@@ -1120,8 +1116,7 @@ static void netem_walk(struct Qdisc *sch, struct qdisc_walker *walker)
 static const struct Qdisc_class_ops netem_class_ops = {
 	.graft		=	netem_graft,
 	.leaf		=	netem_leaf,
-	.get		=	netem_get,
-	.put		=	netem_put,
+	.find		=	netem_find,
 	.walk		=	netem_walk,
 	.dump		=	netem_dump_class,
 };
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index e3e364cc9a70..f31b28f788c0 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -260,7 +260,7 @@ prio_leaf(struct Qdisc *sch, unsigned long arg)
 	return q->queues[band];
 }
 
-static unsigned long prio_get(struct Qdisc *sch, u32 classid)
+static unsigned long prio_find(struct Qdisc *sch, u32 classid)
 {
 	struct prio_sched_data *q = qdisc_priv(sch);
 	unsigned long band = TC_H_MIN(classid);
@@ -272,11 +272,11 @@ static unsigned long prio_get(struct Qdisc *sch, u32 classid)
 
 static unsigned long prio_bind(struct Qdisc *sch, unsigned long parent, u32 classid)
 {
-	return prio_get(sch, classid);
+	return prio_find(sch, classid);
 }
 
 
-static void prio_put(struct Qdisc *q, unsigned long cl)
+static void prio_unbind(struct Qdisc *q, unsigned long cl)
 {
 }
 
@@ -338,12 +338,11 @@ static struct tcf_block *prio_tcf_block(struct Qdisc *sch, unsigned long cl)
 static const struct Qdisc_class_ops prio_class_ops = {
 	.graft		=	prio_graft,
 	.leaf		=	prio_leaf,
-	.get		=	prio_get,
-	.put		=	prio_put,
+	.find		=	prio_find,
 	.walk		=	prio_walk,
 	.tcf_block	=	prio_tcf_block,
 	.bind_tcf	=	prio_bind,
-	.unbind_tcf	=	prio_put,
+	.unbind_tcf	=	prio_unbind,
 	.dump		=	prio_dump_class,
 	.dump_stats	=	prio_dump_class_stats,
 };
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index 9caa959f91e1..cd661a7f81e6 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -132,7 +132,6 @@ struct qfq_aggregate;
 struct qfq_class {
 	struct Qdisc_class_common common;
 
-	unsigned int refcnt;
 	unsigned int filter_cnt;
 
 	struct gnet_stats_basic_packed bstats;
@@ -477,7 +476,6 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
 	if (cl == NULL)
 		return -ENOBUFS;
 
-	cl->refcnt = 1;
 	cl->common.classid = classid;
 	cl->deficit = lmax;
 
@@ -555,32 +553,15 @@ static int qfq_delete_class(struct Qdisc *sch, unsigned long arg)
 	qfq_purge_queue(cl);
 	qdisc_class_hash_remove(&q->clhash, &cl->common);
 
-	BUG_ON(--cl->refcnt == 0);
-	/*
-	 * This shouldn't happen: we "hold" one cops->get() when called
-	 * from tc_ctl_tclass; the destroy method is done from cops->put().
-	 */
-
 	sch_tree_unlock(sch);
-	return 0;
-}
-
-static unsigned long qfq_get_class(struct Qdisc *sch, u32 classid)
-{
-	struct qfq_class *cl = qfq_find_class(sch, classid);
-
-	if (cl != NULL)
-		cl->refcnt++;
 
-	return (unsigned long)cl;
+	qfq_destroy_class(sch, cl);
+	return 0;
 }
 
-static void qfq_put_class(struct Qdisc *sch, unsigned long arg)
+static unsigned long qfq_search_class(struct Qdisc *sch, u32 classid)
 {
-	struct qfq_class *cl = (struct qfq_class *)arg;
-
-	if (--cl->refcnt == 0)
-		qfq_destroy_class(sch, cl);
+	return (unsigned long)qfq_find_class(sch, classid);
 }
 
 static struct tcf_block *qfq_tcf_block(struct Qdisc *sch, unsigned long cl)
@@ -1510,8 +1491,7 @@ static void qfq_destroy_qdisc(struct Qdisc *sch)
 static const struct Qdisc_class_ops qfq_class_ops = {
 	.change		= qfq_change_class,
 	.delete		= qfq_delete_class,
-	.get		= qfq_get_class,
-	.put		= qfq_put_class,
+	.find		= qfq_search_class,
 	.tcf_block	= qfq_tcf_block,
 	.bind_tcf	= qfq_bind_tcf,
 	.unbind_tcf	= qfq_unbind_tcf,
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index 11292adce412..93b9d70a9b28 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -311,15 +311,11 @@ static struct Qdisc *red_leaf(struct Qdisc *sch, unsigned long arg)
 	return q->qdisc;
 }
 
-static unsigned long red_get(struct Qdisc *sch, u32 classid)
+static unsigned long red_find(struct Qdisc *sch, u32 classid)
 {
 	return 1;
 }
 
-static void red_put(struct Qdisc *sch, unsigned long arg)
-{
-}
-
 static void red_walk(struct Qdisc *sch, struct qdisc_walker *walker)
 {
 	if (!walker->stop) {
@@ -335,8 +331,7 @@ static void red_walk(struct Qdisc *sch, struct qdisc_walker *walker)
 static const struct Qdisc_class_ops red_class_ops = {
 	.graft		=	red_graft,
 	.leaf		=	red_leaf,
-	.get		=	red_get,
-	.put		=	red_put,
+	.find		=	red_find,
 	.walk		=	red_walk,
 	.dump		=	red_dump_class,
 };
diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c
index 11fb6ec878d6..cc39e170b4aa 100644
--- a/net/sched/sch_sfb.c
+++ b/net/sched/sch_sfb.c
@@ -632,12 +632,12 @@ static struct Qdisc *sfb_leaf(struct Qdisc *sch, unsigned long arg)
 	return q->qdisc;
 }
 
-static unsigned long sfb_get(struct Qdisc *sch, u32 classid)
+static unsigned long sfb_find(struct Qdisc *sch, u32 classid)
 {
 	return 1;
 }
 
-static void sfb_put(struct Qdisc *sch, unsigned long arg)
+static void sfb_unbind(struct Qdisc *sch, unsigned long arg)
 {
 }
 
@@ -683,14 +683,13 @@ static unsigned long sfb_bind(struct Qdisc *sch, unsigned long parent,
 static const struct Qdisc_class_ops sfb_class_ops = {
 	.graft		=	sfb_graft,
 	.leaf		=	sfb_leaf,
-	.get		=	sfb_get,
-	.put		=	sfb_put,
+	.find		=	sfb_find,
 	.change		=	sfb_change_class,
 	.delete		=	sfb_delete,
 	.walk		=	sfb_walk,
 	.tcf_block	=	sfb_tcf_block,
 	.bind_tcf	=	sfb_bind,
-	.unbind_tcf	=	sfb_put,
+	.unbind_tcf	=	sfb_unbind,
 	.dump		=	sfb_dump_class,
 };
 
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 82469ef9655e..b46e0121dd1a 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -808,7 +808,7 @@ static struct Qdisc *sfq_leaf(struct Qdisc *sch, unsigned long arg)
 	return NULL;
 }
 
-static unsigned long sfq_get(struct Qdisc *sch, u32 classid)
+static unsigned long sfq_find(struct Qdisc *sch, u32 classid)
 {
 	return 0;
 }
@@ -821,7 +821,7 @@ static unsigned long sfq_bind(struct Qdisc *sch, unsigned long parent,
 	return 0;
 }
 
-static void sfq_put(struct Qdisc *q, unsigned long cl)
+static void sfq_unbind(struct Qdisc *q, unsigned long cl)
 {
 }
 
@@ -885,11 +885,10 @@ static void sfq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
 
 static const struct Qdisc_class_ops sfq_class_ops = {
 	.leaf		=	sfq_leaf,
-	.get		=	sfq_get,
-	.put		=	sfq_put,
+	.find		=	sfq_find,
 	.tcf_block	=	sfq_tcf_block,
 	.bind_tcf	=	sfq_bind,
-	.unbind_tcf	=	sfq_put,
+	.unbind_tcf	=	sfq_unbind,
 	.dump		=	sfq_dump_class,
 	.dump_stats	=	sfq_dump_class_stats,
 	.walk		=	sfq_walk,
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index b2e4b6ad241a..d5dba972ab06 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -510,15 +510,11 @@ static struct Qdisc *tbf_leaf(struct Qdisc *sch, unsigned long arg)
 	return q->qdisc;
 }
 
-static unsigned long tbf_get(struct Qdisc *sch, u32 classid)
+static unsigned long tbf_find(struct Qdisc *sch, u32 classid)
 {
 	return 1;
 }
 
-static void tbf_put(struct Qdisc *sch, unsigned long arg)
-{
-}
-
 static void tbf_walk(struct Qdisc *sch, struct qdisc_walker *walker)
 {
 	if (!walker->stop) {
@@ -534,8 +530,7 @@ static void tbf_walk(struct Qdisc *sch, struct qdisc_walker *walker)
 static const struct Qdisc_class_ops tbf_class_ops = {
 	.graft		=	tbf_graft,
 	.leaf		=	tbf_leaf,
-	.get		=	tbf_get,
-	.put		=	tbf_put,
+	.find		=	tbf_find,
 	.walk		=	tbf_walk,
 	.dump		=	tbf_dump_class,
 };
-- 
cgit v1.2.3


From 3cd904ecbb5d0bcf36dfca7e726bdfd6d3644334 Mon Sep 17 00:00:00 2001
From: WANG Cong <xiyou.wangcong@gmail.com>
Date: Thu, 24 Aug 2017 16:51:30 -0700
Subject: net_sched: kill u32_node pointer in Qdisc

It is ugly to hide a u32-filter-specific pointer inside Qdisc,
this breaks the TC layers:

1. Qdisc is a generic representation, should not have any specific
   data of any type

2. Qdisc layer is above filter layer, should only save filters in
   the list of struct tcf_proto.

This pointer is used as the head of the chain of u32 hash tables,
that is struct tc_u_hnode, because u32 filter is very special,
it allows to create multiple hash tables within one qdisc and
across multiple u32 filters.

Instead of using this ugly pointer, we can just save it in a global
hash table key'ed by (dev ifindex, qdisc handle), therefore we can
still treat it as a per qdisc basis data structure conceptually.

Of course, because of network namespaces, this key is not unique
at all, but it is fine as we already have a pointer to Qdisc in
struct tc_u_common, we can just compare the pointers when collision.

And this only affects slow paths, has no impact to fast path,
thanks to the pointer ->tp_c.

Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Cc: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h |  1 -
 net/sched/cls_u32.c       | 57 +++++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 53 insertions(+), 5 deletions(-)

(limited to 'include/net')

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index d817585aa5df..c30b634c5f82 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -75,7 +75,6 @@ struct Qdisc {
 	struct hlist_node       hash;
 	u32			handle;
 	u32			parent;
-	void			*u32_node;
 
 	struct netdev_queue	*dev_queue;
 
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index af22742d2847..99ea4c74dd5b 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -40,6 +40,8 @@
 #include <linux/rtnetlink.h>
 #include <linux/skbuff.h>
 #include <linux/bitmap.h>
+#include <linux/netdevice.h>
+#include <linux/hash.h>
 #include <net/netlink.h>
 #include <net/act_api.h>
 #include <net/pkt_cls.h>
@@ -92,6 +94,7 @@ struct tc_u_common {
 	struct Qdisc		*q;
 	int			refcnt;
 	u32			hgenerator;
+	struct hlist_node	hnode;
 	struct rcu_head		rcu;
 };
 
@@ -323,12 +326,40 @@ static u32 gen_new_htid(struct tc_u_common *tp_c)
 	return i > 0 ? (tp_c->hgenerator|0x800)<<20 : 0;
 }
 
+static struct hlist_head *tc_u_common_hash;
+
+#define U32_HASH_SHIFT 10
+#define U32_HASH_SIZE (1 << U32_HASH_SHIFT)
+
+static unsigned int tc_u_hash(const struct tcf_proto *tp)
+{
+	struct net_device *dev = tp->q->dev_queue->dev;
+	u32 qhandle = tp->q->handle;
+	int ifindex = dev->ifindex;
+
+	return hash_64((u64)ifindex << 32 | qhandle, U32_HASH_SHIFT);
+}
+
+static struct tc_u_common *tc_u_common_find(const struct tcf_proto *tp)
+{
+	struct tc_u_common *tc;
+	unsigned int h;
+
+	h = tc_u_hash(tp);
+	hlist_for_each_entry(tc, &tc_u_common_hash[h], hnode) {
+		if (tc->q == tp->q)
+			return tc;
+	}
+	return NULL;
+}
+
 static int u32_init(struct tcf_proto *tp)
 {
 	struct tc_u_hnode *root_ht;
 	struct tc_u_common *tp_c;
+	unsigned int h;
 
-	tp_c = tp->q->u32_node;
+	tp_c = tc_u_common_find(tp);
 
 	root_ht = kzalloc(sizeof(*root_ht), GFP_KERNEL);
 	if (root_ht == NULL)
@@ -345,7 +376,10 @@ static int u32_init(struct tcf_proto *tp)
 			return -ENOBUFS;
 		}
 		tp_c->q = tp->q;
-		tp->q->u32_node = tp_c;
+		INIT_HLIST_NODE(&tp_c->hnode);
+
+		h = tc_u_hash(tp);
+		hlist_add_head(&tp_c->hnode, &tc_u_common_hash[h]);
 	}
 
 	tp_c->refcnt++;
@@ -585,7 +619,7 @@ static void u32_destroy(struct tcf_proto *tp)
 	if (--tp_c->refcnt == 0) {
 		struct tc_u_hnode *ht;
 
-		tp->q->u32_node = NULL;
+		hlist_del(&tp_c->hnode);
 
 		for (ht = rtnl_dereference(tp_c->hlist);
 		     ht;
@@ -1213,6 +1247,8 @@ static struct tcf_proto_ops cls_u32_ops __read_mostly = {
 
 static int __init init_u32(void)
 {
+	int i, ret;
+
 	pr_info("u32 classifier\n");
 #ifdef CONFIG_CLS_U32_PERF
 	pr_info("    Performance counters on\n");
@@ -1223,12 +1259,25 @@ static int __init init_u32(void)
 #ifdef CONFIG_NET_CLS_ACT
 	pr_info("    Actions configured\n");
 #endif
-	return register_tcf_proto_ops(&cls_u32_ops);
+	tc_u_common_hash = kvmalloc_array(U32_HASH_SIZE,
+					  sizeof(struct hlist_head),
+					  GFP_KERNEL);
+	if (!tc_u_common_hash)
+		return -ENOMEM;
+
+	for (i = 0; i < U32_HASH_SIZE; i++)
+		INIT_HLIST_HEAD(&tc_u_common_hash[i]);
+
+	ret = register_tcf_proto_ops(&cls_u32_ops);
+	if (ret)
+		kvfree(tc_u_common_hash);
+	return ret;
 }
 
 static void __exit exit_u32(void)
 {
 	unregister_tcf_proto_ops(&cls_u32_ops);
+	kvfree(tc_u_common_hash);
 }
 
 module_init(init_u32)
-- 
cgit v1.2.3


From 960632ece6949be1ab6f7a911faa4fa6e8305f4a Mon Sep 17 00:00:00 2001
From: Aaron Conole <aconole@bytheb.org>
Date: Thu, 24 Aug 2017 00:08:32 +0200
Subject: netfilter: convert hook list to an array

This converts the storage and layout of netfilter hook entries from a
linked list to an array.  After this commit, hook entries will be
stored adjacent in memory.  The next pointer is no longer required.

The ops pointers are stored at the end of the array as they are only
used in the register/unregister path and in the legacy br_netfilter code.

nf_unregister_net_hooks() is slower than needed as it just calls
nf_unregister_net_hook in a loop (i.e. at least n synchronize_net()
calls), this will be addressed in followup patch.

Test setup:
 - ixgbe 10gbit
 - netperf UDP_STREAM, 64 byte packets
 - 5 hooks: (raw + mangle prerouting, mangle+filter input, inet filter):
empty mangle and raw prerouting, mangle and filter input hooks:
353.9
this patch:
364.2

Signed-off-by: Aaron Conole <aconole@bytheb.org>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netdevice.h         |   2 +-
 include/linux/netfilter.h         |  45 +++---
 include/linux/netfilter_ingress.h |   4 +-
 include/net/netfilter/nf_queue.h  |   2 +-
 include/net/netns/netfilter.h     |   2 +-
 net/bridge/br_netfilter_hooks.c   |  19 ++-
 net/netfilter/core.c              | 297 ++++++++++++++++++++++++++++----------
 net/netfilter/nf_internals.h      |   3 +-
 net/netfilter/nf_queue.c          |  67 +++++----
 9 files changed, 307 insertions(+), 134 deletions(-)

(limited to 'include/net')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 614642eb7eb7..ca0a30127300 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1811,7 +1811,7 @@ struct net_device {
 #endif
 	struct netdev_queue __rcu *ingress_queue;
 #ifdef CONFIG_NETFILTER_INGRESS
-	struct nf_hook_entry __rcu *nf_hooks_ingress;
+	struct nf_hook_entries __rcu *nf_hooks_ingress;
 #endif
 
 	unsigned char		broadcast[MAX_ADDR_LEN];
diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index 22f081065d49..f84bca1703cd 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -72,25 +72,32 @@ struct nf_hook_ops {
 };
 
 struct nf_hook_entry {
-	struct nf_hook_entry __rcu	*next;
 	nf_hookfn			*hook;
 	void				*priv;
-	const struct nf_hook_ops	*orig_ops;
 };
 
-static inline void
-nf_hook_entry_init(struct nf_hook_entry *entry,	const struct nf_hook_ops *ops)
-{
-	entry->next = NULL;
-	entry->hook = ops->hook;
-	entry->priv = ops->priv;
-	entry->orig_ops = ops;
-}
+struct nf_hook_entries {
+	u16				num_hook_entries;
+	/* padding */
+	struct nf_hook_entry		hooks[];
+
+	/* trailer: pointers to original orig_ops of each hook.
+	 *
+	 * This is not part of struct nf_hook_entry since its only
+	 * needed in slow path (hook register/unregister).
+	 *
+	 * const struct nf_hook_ops     *orig_ops[]
+	 */
+};
 
-static inline int
-nf_hook_entry_priority(const struct nf_hook_entry *entry)
+static inline struct nf_hook_ops **nf_hook_entries_get_hook_ops(const struct nf_hook_entries *e)
 {
-	return entry->orig_ops->priority;
+	unsigned int n = e->num_hook_entries;
+	const void *hook_end;
+
+	hook_end = &e->hooks[n]; /* this is *past* ->hooks[]! */
+
+	return (struct nf_hook_ops **)hook_end;
 }
 
 static inline int
@@ -100,12 +107,6 @@ nf_hook_entry_hookfn(const struct nf_hook_entry *entry, struct sk_buff *skb,
 	return entry->hook(entry->priv, skb, state);
 }
 
-static inline const struct nf_hook_ops *
-nf_hook_entry_ops(const struct nf_hook_entry *entry)
-{
-	return entry->orig_ops;
-}
-
 static inline void nf_hook_state_init(struct nf_hook_state *p,
 				      unsigned int hook,
 				      u_int8_t pf,
@@ -168,7 +169,7 @@ extern struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
 #endif
 
 int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state,
-		 struct nf_hook_entry *entry);
+		 const struct nf_hook_entries *e, unsigned int i);
 
 /**
  *	nf_hook - call a netfilter hook
@@ -182,7 +183,7 @@ static inline int nf_hook(u_int8_t pf, unsigned int hook, struct net *net,
 			  struct net_device *indev, struct net_device *outdev,
 			  int (*okfn)(struct net *, struct sock *, struct sk_buff *))
 {
-	struct nf_hook_entry *hook_head;
+	struct nf_hook_entries *hook_head;
 	int ret = 1;
 
 #ifdef HAVE_JUMP_LABEL
@@ -200,7 +201,7 @@ static inline int nf_hook(u_int8_t pf, unsigned int hook, struct net *net,
 		nf_hook_state_init(&state, hook, pf, indev, outdev,
 				   sk, net, okfn);
 
-		ret = nf_hook_slow(skb, &state, hook_head);
+		ret = nf_hook_slow(skb, &state, hook_head, 0);
 	}
 	rcu_read_unlock();
 
diff --git a/include/linux/netfilter_ingress.h b/include/linux/netfilter_ingress.h
index 59476061de86..8d5dae1e2ff8 100644
--- a/include/linux/netfilter_ingress.h
+++ b/include/linux/netfilter_ingress.h
@@ -17,7 +17,7 @@ static inline bool nf_hook_ingress_active(const struct sk_buff *skb)
 /* caller must hold rcu_read_lock */
 static inline int nf_hook_ingress(struct sk_buff *skb)
 {
-	struct nf_hook_entry *e = rcu_dereference(skb->dev->nf_hooks_ingress);
+	struct nf_hook_entries *e = rcu_dereference(skb->dev->nf_hooks_ingress);
 	struct nf_hook_state state;
 	int ret;
 
@@ -30,7 +30,7 @@ static inline int nf_hook_ingress(struct sk_buff *skb)
 	nf_hook_state_init(&state, NF_NETDEV_INGRESS,
 			   NFPROTO_NETDEV, skb->dev, NULL, NULL,
 			   dev_net(skb->dev), NULL);
-	ret = nf_hook_slow(skb, &state, e);
+	ret = nf_hook_slow(skb, &state, e, 0);
 	if (ret == 0)
 		return -1;
 
diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h
index 4454719ff849..39468720fc19 100644
--- a/include/net/netfilter/nf_queue.h
+++ b/include/net/netfilter/nf_queue.h
@@ -10,9 +10,9 @@ struct nf_queue_entry {
 	struct list_head	list;
 	struct sk_buff		*skb;
 	unsigned int		id;
+	unsigned int		hook_index;	/* index in hook_entries->hook[] */
 
 	struct nf_hook_state	state;
-	struct nf_hook_entry	*hook;
 	u16			size; /* sizeof(entry) + saved route keys */
 
 	/* extra space to store route keys */
diff --git a/include/net/netns/netfilter.h b/include/net/netns/netfilter.h
index cea396b53a60..72d66c8763d0 100644
--- a/include/net/netns/netfilter.h
+++ b/include/net/netns/netfilter.h
@@ -16,7 +16,7 @@ struct netns_nf {
 #ifdef CONFIG_SYSCTL
 	struct ctl_table_header *nf_log_dir_header;
 #endif
-	struct nf_hook_entry __rcu *hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
+	struct nf_hook_entries __rcu *hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
 #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4)
 	bool			defrag_ipv4;
 #endif
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index 626f4b2cef16..c2eea1b8737a 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -985,22 +985,25 @@ int br_nf_hook_thresh(unsigned int hook, struct net *net,
 		      int (*okfn)(struct net *, struct sock *,
 				  struct sk_buff *))
 {
-	struct nf_hook_entry *elem;
+	const struct nf_hook_entries *e;
 	struct nf_hook_state state;
+	struct nf_hook_ops **ops;
+	unsigned int i;
 	int ret;
 
-	for (elem = rcu_dereference(net->nf.hooks[NFPROTO_BRIDGE][hook]);
-	     elem && nf_hook_entry_priority(elem) <= NF_BR_PRI_BRNF;
-	     elem = rcu_dereference(elem->next))
-		;
-
-	if (!elem)
+	e = rcu_dereference(net->nf.hooks[NFPROTO_BRIDGE][hook]);
+	if (!e)
 		return okfn(net, sk, skb);
 
+	ops = nf_hook_entries_get_hook_ops(e);
+	for (i = 0; i < e->num_hook_entries &&
+	      ops[i]->priority <= NF_BR_PRI_BRNF; i++)
+		;
+
 	nf_hook_state_init(&state, hook, NFPROTO_BRIDGE, indev, outdev,
 			   sk, net, okfn);
 
-	ret = nf_hook_slow(skb, &state, elem);
+	ret = nf_hook_slow(skb, &state, e, i);
 	if (ret == 1)
 		ret = okfn(net, sk, skb);
 
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 974cf2a3795a..1a9e23c9ab98 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -21,7 +21,7 @@
 #include <linux/inetdevice.h>
 #include <linux/proc_fs.h>
 #include <linux/mutex.h>
-#include <linux/slab.h>
+#include <linux/mm.h>
 #include <linux/rcupdate.h>
 #include <net/net_namespace.h>
 #include <net/sock.h>
@@ -62,10 +62,160 @@ EXPORT_SYMBOL(nf_hooks_needed);
 #endif
 
 static DEFINE_MUTEX(nf_hook_mutex);
+
+/* max hooks per family/hooknum */
+#define MAX_HOOK_COUNT		1024
+
 #define nf_entry_dereference(e) \
 	rcu_dereference_protected(e, lockdep_is_held(&nf_hook_mutex))
 
-static struct nf_hook_entry __rcu **nf_hook_entry_head(struct net *net, const struct nf_hook_ops *reg)
+static struct nf_hook_entries *allocate_hook_entries_size(u16 num)
+{
+	struct nf_hook_entries *e;
+	size_t alloc = sizeof(*e) +
+		       sizeof(struct nf_hook_entry) * num +
+		       sizeof(struct nf_hook_ops *) * num;
+
+	if (num == 0)
+		return NULL;
+
+	e = kvzalloc(alloc, GFP_KERNEL);
+	if (e)
+		e->num_hook_entries = num;
+	return e;
+}
+
+static unsigned int accept_all(void *priv,
+			       struct sk_buff *skb,
+			       const struct nf_hook_state *state)
+{
+	return NF_ACCEPT; /* ACCEPT makes nf_hook_slow call next hook */
+}
+
+static const struct nf_hook_ops dummy_ops = {
+	.hook = accept_all,
+	.priority = INT_MIN,
+};
+
+static struct nf_hook_entries *
+nf_hook_entries_grow(const struct nf_hook_entries *old,
+		     const struct nf_hook_ops *reg)
+{
+	unsigned int i, alloc_entries, nhooks, old_entries;
+	struct nf_hook_ops **orig_ops = NULL;
+	struct nf_hook_ops **new_ops;
+	struct nf_hook_entries *new;
+	bool inserted = false;
+
+	alloc_entries = 1;
+	old_entries = old ? old->num_hook_entries : 0;
+
+	if (old) {
+		orig_ops = nf_hook_entries_get_hook_ops(old);
+
+		for (i = 0; i < old_entries; i++) {
+			if (orig_ops[i] != &dummy_ops)
+				alloc_entries++;
+		}
+	}
+
+	if (alloc_entries > MAX_HOOK_COUNT)
+		return ERR_PTR(-E2BIG);
+
+	new = allocate_hook_entries_size(alloc_entries);
+	if (!new)
+		return ERR_PTR(-ENOMEM);
+
+	new_ops = nf_hook_entries_get_hook_ops(new);
+
+	i = 0;
+	nhooks = 0;
+	while (i < old_entries) {
+		if (orig_ops[i] == &dummy_ops) {
+			++i;
+			continue;
+		}
+		if (inserted || reg->priority > orig_ops[i]->priority) {
+			new_ops[nhooks] = (void *)orig_ops[i];
+			new->hooks[nhooks] = old->hooks[i];
+			i++;
+		} else {
+			new_ops[nhooks] = (void *)reg;
+			new->hooks[nhooks].hook = reg->hook;
+			new->hooks[nhooks].priv = reg->priv;
+			inserted = true;
+		}
+		nhooks++;
+	}
+
+	if (!inserted) {
+		new_ops[nhooks] = (void *)reg;
+		new->hooks[nhooks].hook = reg->hook;
+		new->hooks[nhooks].priv = reg->priv;
+	}
+
+	return new;
+}
+
+/*
+ * __nf_hook_entries_try_shrink - try to shrink hook array
+ *
+ * @pp -- location of hook blob
+ *
+ * Hook unregistration must always succeed, so to-be-removed hooks
+ * are replaced by a dummy one that will just move to next hook.
+ *
+ * This counts the current dummy hooks, attempts to allocate new blob,
+ * copies the live hooks, then replaces and discards old one.
+ *
+ * return values:
+ *
+ * Returns address to free, or NULL.
+ */
+static void *__nf_hook_entries_try_shrink(struct nf_hook_entries __rcu **pp)
+{
+	struct nf_hook_entries *old, *new = NULL;
+	unsigned int i, j, skip = 0, hook_entries;
+	struct nf_hook_ops **orig_ops;
+	struct nf_hook_ops **new_ops;
+
+	old = nf_entry_dereference(*pp);
+	if (WARN_ON_ONCE(!old))
+		return NULL;
+
+	orig_ops = nf_hook_entries_get_hook_ops(old);
+	for (i = 0; i < old->num_hook_entries; i++) {
+		if (orig_ops[i] == &dummy_ops)
+			skip++;
+	}
+
+	/* if skip == hook_entries all hooks have been removed */
+	hook_entries = old->num_hook_entries;
+	if (skip == hook_entries)
+		goto out_assign;
+
+	if (WARN_ON(skip == 0))
+		return NULL;
+
+	hook_entries -= skip;
+	new = allocate_hook_entries_size(hook_entries);
+	if (!new)
+		return NULL;
+
+	new_ops = nf_hook_entries_get_hook_ops(new);
+	for (i = 0, j = 0; i < old->num_hook_entries; i++) {
+		if (orig_ops[i] == &dummy_ops)
+			continue;
+		new->hooks[j] = old->hooks[i];
+		new_ops[j] = (void *)orig_ops[i];
+		j++;
+	}
+out_assign:
+	rcu_assign_pointer(*pp, new);
+	return old;
+}
+
+static struct nf_hook_entries __rcu **nf_hook_entry_head(struct net *net, const struct nf_hook_ops *reg)
 {
 	if (reg->pf != NFPROTO_NETDEV)
 		return net->nf.hooks[reg->pf]+reg->hooknum;
@@ -76,13 +226,14 @@ static struct nf_hook_entry __rcu **nf_hook_entry_head(struct net *net, const st
 			return &reg->dev->nf_hooks_ingress;
 	}
 #endif
+	WARN_ON_ONCE(1);
 	return NULL;
 }
 
 int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg)
 {
-	struct nf_hook_entry __rcu **pp;
-	struct nf_hook_entry *entry, *p;
+	struct nf_hook_entries *p, *new_hooks;
+	struct nf_hook_entries __rcu **pp;
 
 	if (reg->pf == NFPROTO_NETDEV) {
 #ifndef CONFIG_NETFILTER_INGRESS
@@ -98,23 +249,18 @@ int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg)
 	if (!pp)
 		return -EINVAL;
 
-	entry = kmalloc(sizeof(*entry), GFP_KERNEL);
-	if (!entry)
-		return -ENOMEM;
-
-	nf_hook_entry_init(entry, reg);
-
 	mutex_lock(&nf_hook_mutex);
 
-	/* Find the spot in the list */
-	for (; (p = nf_entry_dereference(*pp)) != NULL; pp = &p->next) {
-		if (reg->priority < nf_hook_entry_priority(p))
-			break;
-	}
-	rcu_assign_pointer(entry->next, p);
-	rcu_assign_pointer(*pp, entry);
+	p = nf_entry_dereference(*pp);
+	new_hooks = nf_hook_entries_grow(p, reg);
+
+	if (!IS_ERR(new_hooks))
+		rcu_assign_pointer(*pp, new_hooks);
 
 	mutex_unlock(&nf_hook_mutex);
+	if (IS_ERR(new_hooks))
+		return PTR_ERR(new_hooks);
+
 #ifdef CONFIG_NETFILTER_INGRESS
 	if (reg->pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_INGRESS)
 		net_inc_ingress_queue();
@@ -122,48 +268,74 @@ int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg)
 #ifdef HAVE_JUMP_LABEL
 	static_key_slow_inc(&nf_hooks_needed[reg->pf][reg->hooknum]);
 #endif
+	synchronize_net();
+	BUG_ON(p == new_hooks);
+	kvfree(p);
 	return 0;
 }
 EXPORT_SYMBOL(nf_register_net_hook);
 
-static struct nf_hook_entry *
-__nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg)
+/*
+ * __nf_unregister_net_hook - remove a hook from blob
+ *
+ * @oldp: current address of hook blob
+ * @unreg: hook to unregister
+ *
+ * This cannot fail, hook unregistration must always succeed.
+ * Therefore replace the to-be-removed hook with a dummy hook.
+ */
+static void __nf_unregister_net_hook(struct nf_hook_entries *old,
+				     const struct nf_hook_ops *unreg)
 {
-	struct nf_hook_entry __rcu **pp;
-	struct nf_hook_entry *p;
-
-	pp = nf_hook_entry_head(net, reg);
-	if (WARN_ON_ONCE(!pp))
-		return NULL;
+	struct nf_hook_ops **orig_ops;
+	bool found = false;
+	unsigned int i;
 
-	mutex_lock(&nf_hook_mutex);
-	for (; (p = nf_entry_dereference(*pp)) != NULL; pp = &p->next) {
-		if (nf_hook_entry_ops(p) == reg) {
-			rcu_assign_pointer(*pp, p->next);
-			break;
-		}
-	}
-	mutex_unlock(&nf_hook_mutex);
-	if (!p) {
-		WARN(1, "nf_unregister_net_hook: hook not found!\n");
-		return NULL;
+	orig_ops = nf_hook_entries_get_hook_ops(old);
+	for (i = 0; i < old->num_hook_entries; i++) {
+		if (orig_ops[i] != unreg)
+			continue;
+		WRITE_ONCE(old->hooks[i].hook, accept_all);
+		WRITE_ONCE(orig_ops[i], &dummy_ops);
+		found = true;
+		break;
 	}
+
+	if (found) {
 #ifdef CONFIG_NETFILTER_INGRESS
-	if (reg->pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_INGRESS)
-		net_dec_ingress_queue();
+		if (unreg->pf == NFPROTO_NETDEV && unreg->hooknum == NF_NETDEV_INGRESS)
+			net_dec_ingress_queue();
 #endif
 #ifdef HAVE_JUMP_LABEL
-	static_key_slow_dec(&nf_hooks_needed[reg->pf][reg->hooknum]);
+		static_key_slow_dec(&nf_hooks_needed[unreg->pf][unreg->hooknum]);
 #endif
-
-	return p;
+	} else {
+		WARN_ONCE(1, "hook not found, pf %d num %d", unreg->pf, unreg->hooknum);
+	}
 }
 
 void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg)
 {
-	struct nf_hook_entry *p = __nf_unregister_net_hook(net, reg);
+	struct nf_hook_entries __rcu **pp;
+	struct nf_hook_entries *p;
 	unsigned int nfq;
 
+	pp = nf_hook_entry_head(net, reg);
+	if (!pp)
+		return;
+
+	mutex_lock(&nf_hook_mutex);
+
+	p = nf_entry_dereference(*pp);
+	if (WARN_ON_ONCE(!p)) {
+		mutex_unlock(&nf_hook_mutex);
+		return;
+	}
+
+	__nf_unregister_net_hook(p, reg);
+
+	p = __nf_hook_entries_try_shrink(pp);
+	mutex_unlock(&nf_hook_mutex);
 	if (!p)
 		return;
 
@@ -173,7 +345,7 @@ void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg)
 	nfq = nf_queue_nf_hook_drop(net);
 	if (nfq)
 		synchronize_net();
-	kfree(p);
+	kvfree(p);
 }
 EXPORT_SYMBOL(nf_unregister_net_hook);
 
@@ -200,46 +372,25 @@ EXPORT_SYMBOL(nf_register_net_hooks);
 void nf_unregister_net_hooks(struct net *net, const struct nf_hook_ops *reg,
 			     unsigned int hookcount)
 {
-	struct nf_hook_entry *to_free[16];
-	unsigned int i, n, nfq;
-
-	do {
-		n = min_t(unsigned int, hookcount, ARRAY_SIZE(to_free));
-
-		for (i = 0; i < n; i++)
-			to_free[i] = __nf_unregister_net_hook(net, &reg[i]);
-
-		synchronize_net();
-
-		/* need 2nd synchronize_net() if nfqueue is used, skb
-		 * can get reinjected right before nf_queue_hook_drop()
-		 */
-		nfq = nf_queue_nf_hook_drop(net);
-		if (nfq)
-			synchronize_net();
-
-		for (i = 0; i < n; i++)
-			kfree(to_free[i]);
+	unsigned int i;
 
-		reg += n;
-		hookcount -= n;
-	} while (hookcount > 0);
+	for (i = 0; i < hookcount; i++)
+		nf_unregister_net_hook(net, &reg[i]);
 }
 EXPORT_SYMBOL(nf_unregister_net_hooks);
 
 /* Returns 1 if okfn() needs to be executed by the caller,
  * -EPERM for NF_DROP, 0 otherwise.  Caller must hold rcu_read_lock. */
 int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state,
-		 struct nf_hook_entry *entry)
+		 const struct nf_hook_entries *e, unsigned int s)
 {
 	unsigned int verdict;
 	int ret;
 
-	do {
-		verdict = nf_hook_entry_hookfn(entry, skb, state);
+	for (; s < e->num_hook_entries; s++) {
+		verdict = nf_hook_entry_hookfn(&e->hooks[s], skb, state);
 		switch (verdict & NF_VERDICT_MASK) {
 		case NF_ACCEPT:
-			entry = rcu_dereference(entry->next);
 			break;
 		case NF_DROP:
 			kfree_skb(skb);
@@ -248,8 +399,8 @@ int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state,
 				ret = -EPERM;
 			return ret;
 		case NF_QUEUE:
-			ret = nf_queue(skb, state, &entry, verdict);
-			if (ret == 1 && entry)
+			ret = nf_queue(skb, state, e, s, verdict);
+			if (ret == 1)
 				continue;
 			return ret;
 		default:
@@ -258,7 +409,7 @@ int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state,
 			 */
 			return 0;
 		}
-	} while (entry);
+	}
 
 	return 1;
 }
diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h
index 19f00a47a710..bacd6363946e 100644
--- a/net/netfilter/nf_internals.h
+++ b/net/netfilter/nf_internals.h
@@ -13,7 +13,8 @@
 
 /* nf_queue.c */
 int nf_queue(struct sk_buff *skb, struct nf_hook_state *state,
-	     struct nf_hook_entry **entryp, unsigned int verdict);
+	     const struct nf_hook_entries *entries, unsigned int index,
+	     unsigned int verdict);
 unsigned int nf_queue_nf_hook_drop(struct net *net);
 
 /* nf_log.c */
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index 4f4d80a58fb5..f7e21953b1de 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -112,7 +112,8 @@ unsigned int nf_queue_nf_hook_drop(struct net *net)
 EXPORT_SYMBOL_GPL(nf_queue_nf_hook_drop);
 
 static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state,
-		      struct nf_hook_entry *hook_entry, unsigned int queuenum)
+		      const struct nf_hook_entries *entries,
+		      unsigned int index, unsigned int queuenum)
 {
 	int status = -ENOENT;
 	struct nf_queue_entry *entry = NULL;
@@ -140,7 +141,7 @@ static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state,
 	*entry = (struct nf_queue_entry) {
 		.skb	= skb,
 		.state	= *state,
-		.hook	= hook_entry,
+		.hook_index = index,
 		.size	= sizeof(*entry) + afinfo->route_key_size,
 	};
 
@@ -163,18 +164,16 @@ err:
 
 /* Packets leaving via this function must come back through nf_reinject(). */
 int nf_queue(struct sk_buff *skb, struct nf_hook_state *state,
-	     struct nf_hook_entry **entryp, unsigned int verdict)
+	     const struct nf_hook_entries *entries, unsigned int index,
+	     unsigned int verdict)
 {
-	struct nf_hook_entry *entry = *entryp;
 	int ret;
 
-	ret = __nf_queue(skb, state, entry, verdict >> NF_VERDICT_QBITS);
+	ret = __nf_queue(skb, state, entries, index, verdict >> NF_VERDICT_QBITS);
 	if (ret < 0) {
 		if (ret == -ESRCH &&
-		    (verdict & NF_VERDICT_FLAG_QUEUE_BYPASS)) {
-			*entryp = rcu_dereference(entry->next);
+		    (verdict & NF_VERDICT_FLAG_QUEUE_BYPASS))
 			return 1;
-		}
 		kfree_skb(skb);
 	}
 
@@ -183,33 +182,56 @@ int nf_queue(struct sk_buff *skb, struct nf_hook_state *state,
 
 static unsigned int nf_iterate(struct sk_buff *skb,
 			       struct nf_hook_state *state,
-			       struct nf_hook_entry **entryp)
+			       const struct nf_hook_entries *hooks,
+			       unsigned int *index)
 {
-	unsigned int verdict;
+	const struct nf_hook_entry *hook;
+	unsigned int verdict, i = *index;
 
-	do {
+	while (i < hooks->num_hook_entries) {
+		hook = &hooks->hooks[i];
 repeat:
-		verdict = nf_hook_entry_hookfn((*entryp), skb, state);
+		verdict = nf_hook_entry_hookfn(hook, skb, state);
 		if (verdict != NF_ACCEPT) {
 			if (verdict != NF_REPEAT)
 				return verdict;
 			goto repeat;
 		}
-		*entryp = rcu_dereference((*entryp)->next);
-	} while (*entryp);
+		i++;
+	}
 
+	*index = i;
 	return NF_ACCEPT;
 }
 
+/* Caller must hold rcu read-side lock */
 void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
 {
-	struct nf_hook_entry *hook_entry = entry->hook;
+	const struct nf_hook_entry *hook_entry;
+	const struct nf_hook_entries *hooks;
 	struct sk_buff *skb = entry->skb;
 	const struct nf_afinfo *afinfo;
+	const struct net *net;
+	unsigned int i;
 	int err;
+	u8 pf;
+
+	net = entry->state.net;
+	pf = entry->state.pf;
+
+	hooks = rcu_dereference(net->nf.hooks[pf][entry->state.hook]);
 
 	nf_queue_entry_release_refs(entry);
 
+	i = entry->hook_index;
+	if (WARN_ON_ONCE(i >= hooks->num_hook_entries)) {
+		kfree_skb(skb);
+		kfree(entry);
+		return;
+	}
+
+	hook_entry = &hooks->hooks[i];
+
 	/* Continue traversal iff userspace said ok... */
 	if (verdict == NF_REPEAT)
 		verdict = nf_hook_entry_hookfn(hook_entry, skb, &entry->state);
@@ -221,27 +243,22 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
 	}
 
 	if (verdict == NF_ACCEPT) {
-		hook_entry = rcu_dereference(hook_entry->next);
-		if (hook_entry)
 next_hook:
-			verdict = nf_iterate(skb, &entry->state, &hook_entry);
+		++i;
+		verdict = nf_iterate(skb, &entry->state, hooks, &i);
 	}
 
 	switch (verdict & NF_VERDICT_MASK) {
 	case NF_ACCEPT:
 	case NF_STOP:
-okfn:
 		local_bh_disable();
 		entry->state.okfn(entry->state.net, entry->state.sk, skb);
 		local_bh_enable();
 		break;
 	case NF_QUEUE:
-		err = nf_queue(skb, &entry->state, &hook_entry, verdict);
-		if (err == 1) {
-			if (hook_entry)
-				goto next_hook;
-			goto okfn;
-		}
+		err = nf_queue(skb, &entry->state, hooks, i, verdict);
+		if (err == 1)
+			goto next_hook;
 		break;
 	case NF_STOLEN:
 		break;
-- 
cgit v1.2.3


From 1a66a836da630cd70f3639208da549b549ce576b Mon Sep 17 00:00:00 2001
From: William Tu <u9012063@gmail.com>
Date: Fri, 25 Aug 2017 09:21:28 -0700
Subject: gre: add collect_md mode to ERSPAN tunnel

Similar to gre, vxlan, geneve, ipip tunnels, allow ERSPAN tunnels to
operate in 'collect metadata' mode.  bpf_skb_[gs]et_tunnel_key() helpers
can make use of it right away.  OVS can use it as well in the future.

Signed-off-by: William Tu <u9012063@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip_tunnels.h |   4 +-
 net/ipv4/ip_gre.c        | 102 +++++++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 101 insertions(+), 5 deletions(-)

(limited to 'include/net')

diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index 625c29329372..992652856fe8 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -154,8 +154,10 @@ struct ip_tunnel {
 #define TUNNEL_GENEVE_OPT	__cpu_to_be16(0x0800)
 #define TUNNEL_VXLAN_OPT	__cpu_to_be16(0x1000)
 #define TUNNEL_NOCACHE		__cpu_to_be16(0x2000)
+#define TUNNEL_ERSPAN_OPT	__cpu_to_be16(0x4000)
 
-#define TUNNEL_OPTIONS_PRESENT	(TUNNEL_GENEVE_OPT | TUNNEL_VXLAN_OPT)
+#define TUNNEL_OPTIONS_PRESENT \
+		(TUNNEL_GENEVE_OPT | TUNNEL_VXLAN_OPT | TUNNEL_ERSPAN_OPT)
 
 struct tnl_ptk_info {
 	__be16 flags;
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 453b7925b940..0162fb955b33 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -113,6 +113,8 @@ MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
 
 static struct rtnl_link_ops ipgre_link_ops __read_mostly;
 static int ipgre_tunnel_init(struct net_device *dev);
+static void erspan_build_header(struct sk_buff *skb,
+				__be32 id, u32 index, bool truncate);
 
 static unsigned int ipgre_net_id __read_mostly;
 static unsigned int gre_tap_net_id __read_mostly;
@@ -287,7 +289,33 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
 					   false, false) < 0)
 			goto drop;
 
-		tunnel->index = ntohl(index);
+		if (tunnel->collect_md) {
+			struct ip_tunnel_info *info;
+			struct erspan_metadata *md;
+			__be64 tun_id;
+			__be16 flags;
+
+			tpi->flags |= TUNNEL_KEY;
+			flags = tpi->flags;
+			tun_id = key32_to_tunnel_id(tpi->key);
+
+			tun_dst = ip_tun_rx_dst(skb, flags,
+						tun_id, sizeof(*md));
+			if (!tun_dst)
+				return PACKET_REJECT;
+
+			md = ip_tunnel_info_opts(&tun_dst->u.tun_info);
+			if (!md)
+				return PACKET_REJECT;
+
+			md->index = index;
+			info = &tun_dst->u.tun_info;
+			info->key.tun_flags |= TUNNEL_ERSPAN_OPT;
+			info->options_len = sizeof(*md);
+		} else {
+			tunnel->index = ntohl(index);
+		}
+
 		skb_reset_mac_header(skb);
 		ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
 		return PACKET_RCVD;
@@ -523,6 +551,64 @@ err_free_skb:
 	dev->stats.tx_dropped++;
 }
 
+static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev,
+			   __be16 proto)
+{
+	struct ip_tunnel *tunnel = netdev_priv(dev);
+	struct ip_tunnel_info *tun_info;
+	const struct ip_tunnel_key *key;
+	struct erspan_metadata *md;
+	struct rtable *rt = NULL;
+	bool truncate = false;
+	struct flowi4 fl;
+	int tunnel_hlen;
+	__be16 df;
+
+	tun_info = skb_tunnel_info(skb);
+	if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
+		     ip_tunnel_info_af(tun_info) != AF_INET))
+		goto err_free_skb;
+
+	key = &tun_info->key;
+
+	/* ERSPAN has fixed 8 byte GRE header */
+	tunnel_hlen = 8 + sizeof(struct erspanhdr);
+
+	rt = prepare_fb_xmit(skb, dev, &fl, tunnel_hlen);
+	if (!rt)
+		return;
+
+	if (gre_handle_offloads(skb, false))
+		goto err_free_rt;
+
+	if (skb->len > dev->mtu) {
+		pskb_trim(skb, dev->mtu);
+		truncate = true;
+	}
+
+	md = ip_tunnel_info_opts(tun_info);
+	if (!md)
+		goto err_free_rt;
+
+	erspan_build_header(skb, tunnel_id_to_key32(key->tun_id),
+			    ntohl(md->index), truncate);
+
+	gre_build_header(skb, 8, TUNNEL_SEQ,
+			 htons(ETH_P_ERSPAN), 0, htonl(tunnel->o_seqno++));
+
+	df = key->tun_flags & TUNNEL_DONT_FRAGMENT ?  htons(IP_DF) : 0;
+
+	iptunnel_xmit(skb->sk, rt, skb, fl.saddr, key->u.ipv4.dst, IPPROTO_GRE,
+		      key->tos, key->ttl, df, false);
+	return;
+
+err_free_rt:
+	ip_rt_put(rt);
+err_free_skb:
+	kfree_skb(skb);
+	dev->stats.tx_dropped++;
+}
+
 static int gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
 {
 	struct ip_tunnel_info *info = skb_tunnel_info(skb);
@@ -636,6 +722,11 @@ static netdev_tx_t erspan_xmit(struct sk_buff *skb,
 	struct ip_tunnel *tunnel = netdev_priv(dev);
 	bool truncate = false;
 
+	if (tunnel->collect_md) {
+		erspan_fb_xmit(skb, dev, skb->protocol);
+		return NETDEV_TX_OK;
+	}
+
 	if (gre_handle_offloads(skb, false))
 		goto free_skb;
 
@@ -998,9 +1089,12 @@ static int erspan_validate(struct nlattr *tb[], struct nlattr *data[],
 		return ret;
 
 	/* ERSPAN should only have GRE sequence and key flag */
-	flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
-	flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
-	if (flags != (GRE_SEQ | GRE_KEY))
+	if (data[IFLA_GRE_OFLAGS])
+		flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
+	if (data[IFLA_GRE_IFLAGS])
+		flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
+	if (!data[IFLA_GRE_COLLECT_METADATA] &&
+	    flags != (GRE_SEQ | GRE_KEY))
 		return -EINVAL;
 
 	/* ERSPAN Session ID only has 10-bit. Since we reuse
-- 
cgit v1.2.3


From 5bf916ee0ab638c86edeaf4caeeade9ddf44d95d Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Sun, 27 Aug 2017 17:03:33 +0200
Subject: irda: move include/net/irda into staging subdirectory

And finally, move the irda include files into
drivers/staging/irda/include/net/irda.  Yes, it's a long path, but it
makes it easy for us to just add a Makefile directory path addition and
all of the net and drivers code "just works".

Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/staging/irda/drivers/Makefile              |   2 +
 drivers/staging/irda/include/net/irda/af_irda.h    |  87 ++++++
 drivers/staging/irda/include/net/irda/crc.h        |  29 ++
 drivers/staging/irda/include/net/irda/discovery.h  |  95 +++++++
 .../staging/irda/include/net/irda/ircomm_core.h    | 106 +++++++
 .../staging/irda/include/net/irda/ircomm_event.h   |  83 ++++++
 drivers/staging/irda/include/net/irda/ircomm_lmp.h |  36 +++
 .../staging/irda/include/net/irda/ircomm_param.h   | 147 ++++++++++
 drivers/staging/irda/include/net/irda/ircomm_ttp.h |  37 +++
 drivers/staging/irda/include/net/irda/ircomm_tty.h | 121 ++++++++
 .../irda/include/net/irda/ircomm_tty_attach.h      |  92 ++++++
 drivers/staging/irda/include/net/irda/irda.h       | 115 ++++++++
 .../staging/irda/include/net/irda/irda_device.h    | 285 +++++++++++++++++++
 drivers/staging/irda/include/net/irda/iriap.h      | 108 +++++++
 .../staging/irda/include/net/irda/iriap_event.h    |  85 ++++++
 .../staging/irda/include/net/irda/irias_object.h   | 108 +++++++
 .../staging/irda/include/net/irda/irlan_client.h   |  42 +++
 .../staging/irda/include/net/irda/irlan_common.h   | 230 +++++++++++++++
 drivers/staging/irda/include/net/irda/irlan_eth.h  |  32 +++
 .../staging/irda/include/net/irda/irlan_event.h    |  81 ++++++
 .../staging/irda/include/net/irda/irlan_filter.h   |  35 +++
 .../staging/irda/include/net/irda/irlan_provider.h |  52 ++++
 drivers/staging/irda/include/net/irda/irlap.h      | 311 +++++++++++++++++++++
 .../staging/irda/include/net/irda/irlap_event.h    | 129 +++++++++
 .../staging/irda/include/net/irda/irlap_frame.h    | 167 +++++++++++
 drivers/staging/irda/include/net/irda/irlmp.h      | 295 +++++++++++++++++++
 .../staging/irda/include/net/irda/irlmp_event.h    |  98 +++++++
 .../staging/irda/include/net/irda/irlmp_frame.h    |  62 ++++
 drivers/staging/irda/include/net/irda/irmod.h      | 109 ++++++++
 drivers/staging/irda/include/net/irda/irqueue.h    |  96 +++++++
 drivers/staging/irda/include/net/irda/irttp.h      | 210 ++++++++++++++
 drivers/staging/irda/include/net/irda/parameters.h | 100 +++++++
 drivers/staging/irda/include/net/irda/qos.h        | 101 +++++++
 drivers/staging/irda/include/net/irda/timer.h      | 105 +++++++
 drivers/staging/irda/include/net/irda/wrapper.h    |  58 ++++
 drivers/staging/irda/net/Makefile                  |   2 +
 include/net/irda/af_irda.h                         |  87 ------
 include/net/irda/crc.h                             |  29 --
 include/net/irda/discovery.h                       |  95 -------
 include/net/irda/ircomm_core.h                     | 106 -------
 include/net/irda/ircomm_event.h                    |  83 ------
 include/net/irda/ircomm_lmp.h                      |  36 ---
 include/net/irda/ircomm_param.h                    | 147 ----------
 include/net/irda/ircomm_ttp.h                      |  37 ---
 include/net/irda/ircomm_tty.h                      | 121 --------
 include/net/irda/ircomm_tty_attach.h               |  92 ------
 include/net/irda/irda.h                            | 115 --------
 include/net/irda/irda_device.h                     | 285 -------------------
 include/net/irda/iriap.h                           | 108 -------
 include/net/irda/iriap_event.h                     |  85 ------
 include/net/irda/irias_object.h                    | 108 -------
 include/net/irda/irlan_client.h                    |  42 ---
 include/net/irda/irlan_common.h                    | 230 ---------------
 include/net/irda/irlan_eth.h                       |  32 ---
 include/net/irda/irlan_event.h                     |  81 ------
 include/net/irda/irlan_filter.h                    |  35 ---
 include/net/irda/irlan_provider.h                  |  52 ----
 include/net/irda/irlap.h                           | 311 ---------------------
 include/net/irda/irlap_event.h                     | 129 ---------
 include/net/irda/irlap_frame.h                     | 167 -----------
 include/net/irda/irlmp.h                           | 295 -------------------
 include/net/irda/irlmp_event.h                     |  98 -------
 include/net/irda/irlmp_frame.h                     |  62 ----
 include/net/irda/irmod.h                           | 109 --------
 include/net/irda/irqueue.h                         |  96 -------
 include/net/irda/irttp.h                           | 210 --------------
 include/net/irda/parameters.h                      | 100 -------
 include/net/irda/qos.h                             | 101 -------
 include/net/irda/timer.h                           | 105 -------
 include/net/irda/wrapper.h                         |  58 ----
 70 files changed, 3851 insertions(+), 3847 deletions(-)
 create mode 100644 drivers/staging/irda/include/net/irda/af_irda.h
 create mode 100644 drivers/staging/irda/include/net/irda/crc.h
 create mode 100644 drivers/staging/irda/include/net/irda/discovery.h
 create mode 100644 drivers/staging/irda/include/net/irda/ircomm_core.h
 create mode 100644 drivers/staging/irda/include/net/irda/ircomm_event.h
 create mode 100644 drivers/staging/irda/include/net/irda/ircomm_lmp.h
 create mode 100644 drivers/staging/irda/include/net/irda/ircomm_param.h
 create mode 100644 drivers/staging/irda/include/net/irda/ircomm_ttp.h
 create mode 100644 drivers/staging/irda/include/net/irda/ircomm_tty.h
 create mode 100644 drivers/staging/irda/include/net/irda/ircomm_tty_attach.h
 create mode 100644 drivers/staging/irda/include/net/irda/irda.h
 create mode 100644 drivers/staging/irda/include/net/irda/irda_device.h
 create mode 100644 drivers/staging/irda/include/net/irda/iriap.h
 create mode 100644 drivers/staging/irda/include/net/irda/iriap_event.h
 create mode 100644 drivers/staging/irda/include/net/irda/irias_object.h
 create mode 100644 drivers/staging/irda/include/net/irda/irlan_client.h
 create mode 100644 drivers/staging/irda/include/net/irda/irlan_common.h
 create mode 100644 drivers/staging/irda/include/net/irda/irlan_eth.h
 create mode 100644 drivers/staging/irda/include/net/irda/irlan_event.h
 create mode 100644 drivers/staging/irda/include/net/irda/irlan_filter.h
 create mode 100644 drivers/staging/irda/include/net/irda/irlan_provider.h
 create mode 100644 drivers/staging/irda/include/net/irda/irlap.h
 create mode 100644 drivers/staging/irda/include/net/irda/irlap_event.h
 create mode 100644 drivers/staging/irda/include/net/irda/irlap_frame.h
 create mode 100644 drivers/staging/irda/include/net/irda/irlmp.h
 create mode 100644 drivers/staging/irda/include/net/irda/irlmp_event.h
 create mode 100644 drivers/staging/irda/include/net/irda/irlmp_frame.h
 create mode 100644 drivers/staging/irda/include/net/irda/irmod.h
 create mode 100644 drivers/staging/irda/include/net/irda/irqueue.h
 create mode 100644 drivers/staging/irda/include/net/irda/irttp.h
 create mode 100644 drivers/staging/irda/include/net/irda/parameters.h
 create mode 100644 drivers/staging/irda/include/net/irda/qos.h
 create mode 100644 drivers/staging/irda/include/net/irda/timer.h
 create mode 100644 drivers/staging/irda/include/net/irda/wrapper.h
 delete mode 100644 include/net/irda/af_irda.h
 delete mode 100644 include/net/irda/crc.h
 delete mode 100644 include/net/irda/discovery.h
 delete mode 100644 include/net/irda/ircomm_core.h
 delete mode 100644 include/net/irda/ircomm_event.h
 delete mode 100644 include/net/irda/ircomm_lmp.h
 delete mode 100644 include/net/irda/ircomm_param.h
 delete mode 100644 include/net/irda/ircomm_ttp.h
 delete mode 100644 include/net/irda/ircomm_tty.h
 delete mode 100644 include/net/irda/ircomm_tty_attach.h
 delete mode 100644 include/net/irda/irda.h
 delete mode 100644 include/net/irda/irda_device.h
 delete mode 100644 include/net/irda/iriap.h
 delete mode 100644 include/net/irda/iriap_event.h
 delete mode 100644 include/net/irda/irias_object.h
 delete mode 100644 include/net/irda/irlan_client.h
 delete mode 100644 include/net/irda/irlan_common.h
 delete mode 100644 include/net/irda/irlan_eth.h
 delete mode 100644 include/net/irda/irlan_event.h
 delete mode 100644 include/net/irda/irlan_filter.h
 delete mode 100644 include/net/irda/irlan_provider.h
 delete mode 100644 include/net/irda/irlap.h
 delete mode 100644 include/net/irda/irlap_event.h
 delete mode 100644 include/net/irda/irlap_frame.h
 delete mode 100644 include/net/irda/irlmp.h
 delete mode 100644 include/net/irda/irlmp_event.h
 delete mode 100644 include/net/irda/irlmp_frame.h
 delete mode 100644 include/net/irda/irmod.h
 delete mode 100644 include/net/irda/irqueue.h
 delete mode 100644 include/net/irda/irttp.h
 delete mode 100644 include/net/irda/parameters.h
 delete mode 100644 include/net/irda/qos.h
 delete mode 100644 include/net/irda/timer.h
 delete mode 100644 include/net/irda/wrapper.h

(limited to 'include/net')

diff --git a/drivers/staging/irda/drivers/Makefile b/drivers/staging/irda/drivers/Makefile
index 4c344433dae5..e2901b135528 100644
--- a/drivers/staging/irda/drivers/Makefile
+++ b/drivers/staging/irda/drivers/Makefile
@@ -5,6 +5,8 @@
 # Rewritten to use lists instead of if-statements.
 #
 
+subdir-ccflags-y += -I$(srctree)/drivers/staging/irda/include
+
 # FIR drivers
 obj-$(CONFIG_USB_IRDA)		+= irda-usb.o
 obj-$(CONFIG_SIGMATEL_FIR)	+= stir4200.o
diff --git a/drivers/staging/irda/include/net/irda/af_irda.h b/drivers/staging/irda/include/net/irda/af_irda.h
new file mode 100644
index 000000000000..0df574931522
--- /dev/null
+++ b/drivers/staging/irda/include/net/irda/af_irda.h
@@ -0,0 +1,87 @@
+/*********************************************************************
+ *                
+ * Filename:      af_irda.h
+ * Version:       1.0
+ * Description:   IrDA sockets declarations
+ * Status:        Stable
+ * Author:        Dag Brattli <dagb@cs.uit.no>
+ * Created at:    Tue Dec  9 21:13:12 1997
+ * Modified at:   Fri Jan 28 13:16:32 2000
+ * Modified by:   Dag Brattli <dagb@cs.uit.no>
+ * 
+ *     Copyright (c) 1998-2000 Dag Brattli, All Rights Reserved.
+ *     Copyright (c) 2000-2002 Jean Tourrilhes <jt@hpl.hp.com>
+ *      
+ *     This program is free software; you can redistribute it and/or 
+ *     modify it under the terms of the GNU General Public License as 
+ *     published by the Free Software Foundation; either version 2 of 
+ *     the License, or (at your option) any later version.
+ *  
+ *     Neither Dag Brattli nor University of Tromsø admit liability nor
+ *     provide warranty for any of this software. This material is 
+ *     provided "AS-IS" and at no charge.
+ *     
+ ********************************************************************/
+
+#ifndef AF_IRDA_H
+#define AF_IRDA_H
+
+#include <linux/irda.h>
+#include <net/irda/irda.h>
+#include <net/irda/iriap.h>		/* struct iriap_cb */
+#include <net/irda/irias_object.h>	/* struct ias_value */
+#include <net/irda/irlmp.h>		/* struct lsap_cb */
+#include <net/irda/irttp.h>		/* struct tsap_cb */
+#include <net/irda/discovery.h>		/* struct discovery_t */
+#include <net/sock.h>
+
+/* IrDA Socket */
+struct irda_sock {
+	/* struct sock has to be the first member of irda_sock */
+	struct sock sk;
+	__u32 saddr;          /* my local address */
+	__u32 daddr;          /* peer address */
+
+	struct lsap_cb *lsap; /* LSAP used by Ultra */
+	__u8  pid;            /* Protocol IP (PID) used by Ultra */
+
+	struct tsap_cb *tsap; /* TSAP used by this connection */
+	__u8 dtsap_sel;       /* remote TSAP address */
+	__u8 stsap_sel;       /* local TSAP address */
+	
+	__u32 max_sdu_size_rx;
+	__u32 max_sdu_size_tx;
+	__u32 max_data_size;
+	__u8  max_header_size;
+	struct qos_info qos_tx;
+
+	__u16_host_order mask;           /* Hint bits mask */
+	__u16_host_order hints;          /* Hint bits */
+
+	void *ckey;           /* IrLMP client handle */
+	void *skey;           /* IrLMP service handle */
+
+	struct ias_object *ias_obj;   /* Our service name + lsap in IAS */
+	struct iriap_cb *iriap;	      /* Used to query remote IAS */
+	struct ias_value *ias_result; /* Result of remote IAS query */
+
+	hashbin_t *cachelog;		/* Result of discovery query */
+	__u32 cachedaddr;	/* Result of selective discovery query */
+
+	int nslots;           /* Number of slots to use for discovery */
+
+	int errno;            /* status of the IAS query */
+
+	wait_queue_head_t query_wait;	/* Wait for the answer to a query */
+	struct timer_list watchdog;	/* Timeout for discovery */
+
+	LOCAL_FLOW tx_flow;
+	LOCAL_FLOW rx_flow;
+};
+
+static inline struct irda_sock *irda_sk(struct sock *sk)
+{
+	return (struct irda_sock *)sk;
+}
+
+#endif /* AF_IRDA_H */
diff --git a/drivers/staging/irda/include/net/irda/crc.h b/drivers/staging/irda/include/net/irda/crc.h
new file mode 100644
index 000000000000..f202296df9bb
--- /dev/null
+++ b/drivers/staging/irda/include/net/irda/crc.h
@@ -0,0 +1,29 @@
+/*********************************************************************
+ *                
+ * Filename:      crc.h
+ * Version:       
+ * Description:   CRC routines
+ * Status:        Experimental.
+ * Author:        Dag Brattli <dagb@cs.uit.no>
+ * Created at:    Mon Aug  4 20:40:53 1997
+ * Modified at:   Sun May  2 20:25:23 1999
+ * Modified by:   Dag Brattli <dagb@cs.uit.no>
+ * 
+ ********************************************************************/
+
+#ifndef IRDA_CRC_H
+#define IRDA_CRC_H
+
+#include <linux/types.h>
+#include <linux/crc-ccitt.h>
+
+#define INIT_FCS  0xffff   /* Initial FCS value */
+#define GOOD_FCS  0xf0b8   /* Good final FCS value */
+
+/* Recompute the FCS with one more character appended. */
+#define irda_fcs(fcs, c) crc_ccitt_byte(fcs, c)
+
+/* Recompute the FCS with len bytes appended. */
+#define irda_calc_crc16(fcs, buf, len) crc_ccitt(fcs, buf, len)
+
+#endif
diff --git a/drivers/staging/irda/include/net/irda/discovery.h b/drivers/staging/irda/include/net/irda/discovery.h
new file mode 100644
index 000000000000..63ae32530567
--- /dev/null
+++ b/drivers/staging/irda/include/net/irda/discovery.h
@@ -0,0 +1,95 @@
+/*********************************************************************
+ *                
+ * Filename:      discovery.h
+ * Version:       
+ * Description:   
+ * Status:        Experimental.
+ * Author:        Dag Brattli <dagb@cs.uit.no>
+ * Created at:    Tue Apr  6 16:53:53 1999
+ * Modified at:   Tue Oct  5 10:05:10 1999
+ * Modified by:   Dag Brattli <dagb@cs.uit.no>
+ * 
+ *     Copyright (c) 1999 Dag Brattli, All Rights Reserved.
+ *     Copyright (c) 2000-2002 Jean Tourrilhes <jt@hpl.hp.com>
+ *     
+ *     This program is free software; you can redistribute it and/or 
+ *     modify it under the terms of the GNU General Public License as 
+ *     published by the Free Software Foundation; either version 2 of 
+ *     the License, or (at your option) any later version.
+ * 
+ *     This program is distributed in the hope that it will be useful,
+ *     but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *     GNU General Public License for more details.
+ * 
+ *     You should have received a copy of the GNU General Public License 
+ *     along with this program; if not, see <http://www.gnu.org/licenses/>.
+ *     
+ ********************************************************************/
+
+#ifndef DISCOVERY_H
+#define DISCOVERY_H
+
+#include <asm/param.h>
+
+#include <net/irda/irda.h>
+#include <net/irda/irqueue.h>		/* irda_queue_t */
+#include <net/irda/irlap_event.h>	/* LAP_REASON */
+
+#define DISCOVERY_EXPIRE_TIMEOUT (2*sysctl_discovery_timeout*HZ)
+#define DISCOVERY_DEFAULT_SLOTS  0
+
+/*
+ *  This type is used by the protocols that transmit 16 bits words in 
+ *  little endian format. A little endian machine stores MSB of word in
+ *  byte[1] and LSB in byte[0]. A big endian machine stores MSB in byte[0] 
+ *  and LSB in byte[1].
+ *
+ * This structure is used in the code for things that are endian neutral
+ * but that fit in a word so that we can manipulate them efficiently.
+ * By endian neutral, I mean things that are really an array of bytes,
+ * and always used as such, for example the hint bits. Jean II
+ */
+typedef union {
+	__u16 word;
+	__u8  byte[2];
+} __u16_host_order;
+
+/* Types of discovery */
+typedef enum {
+	DISCOVERY_LOG,		/* What's in our discovery log */
+	DISCOVERY_ACTIVE,	/* Doing our own discovery on the medium */
+	DISCOVERY_PASSIVE,	/* Peer doing discovery on the medium */
+	EXPIRY_TIMEOUT,		/* Entry expired due to timeout */
+} DISCOVERY_MODE;
+
+#define NICKNAME_MAX_LEN 21
+
+/* Basic discovery information about a peer */
+typedef struct irda_device_info		discinfo_t;	/* linux/irda.h */
+
+/*
+ * The DISCOVERY structure is used for both discovery requests and responses
+ */
+typedef struct discovery_t {
+	irda_queue_t	q;		/* Must be first! */
+
+	discinfo_t	data;		/* Basic discovery information */
+	int		name_len;	/* Length of nickname */
+
+	LAP_REASON	condition;	/* More info about the discovery */
+	int		gen_addr_bit;	/* Need to generate a new device
+					 * address? */
+	int		nslots;		/* Number of slots to use when
+					 * discovering */
+	unsigned long	timestamp;	/* Last time discovered */
+	unsigned long	firststamp;	/* First time discovered */
+} discovery_t;
+
+void irlmp_add_discovery(hashbin_t *cachelog, discovery_t *discovery);
+void irlmp_add_discovery_log(hashbin_t *cachelog, hashbin_t *log);
+void irlmp_expire_discoveries(hashbin_t *log, __u32 saddr, int force);
+struct irda_device_info *irlmp_copy_discoveries(hashbin_t *log, int *pn,
+						__u16 mask, int old_entries);
+
+#endif
diff --git a/drivers/staging/irda/include/net/irda/ircomm_core.h b/drivers/staging/irda/include/net/irda/ircomm_core.h
new file mode 100644
index 000000000000..2a580ce9edad
--- /dev/null
+++ b/drivers/staging/irda/include/net/irda/ircomm_core.h
@@ -0,0 +1,106 @@
+/*********************************************************************
+ *                
+ * Filename:      ircomm_core.h
+ * Version:       
+ * Description:   
+ * Status:        Experimental.
+ * Author:        Dag Brattli <dagb@cs.uit.no>
+ * Created at:    Wed Jun  9 08:58:43 1999
+ * Modified at:   Mon Dec 13 11:52:29 1999
+ * Modified by:   Dag Brattli <dagb@cs.uit.no>
+ * 
+ *     Copyright (c) 1999 Dag Brattli, All Rights Reserved.
+ *     
+ *     This program is free software; you can redistribute it and/or 
+ *     modify it under the terms of the GNU General Public License as 
+ *     published by the Free Software Foundation; either version 2 of 
+ *     the License, or (at your option) any later version.
+ * 
+ *     This program is distributed in the hope that it will be useful,
+ *     but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *     GNU General Public License for more details.
+ * 
+ *     You should have received a copy of the GNU General Public License 
+ *     along with this program; if not, see <http://www.gnu.org/licenses/>.
+ *     
+ ********************************************************************/
+
+#ifndef IRCOMM_CORE_H
+#define IRCOMM_CORE_H
+
+#include <net/irda/irda.h>
+#include <net/irda/irqueue.h>
+#include <net/irda/ircomm_event.h>
+
+#define IRCOMM_MAGIC 0x98347298
+#define IRCOMM_HEADER_SIZE 1
+
+struct ircomm_cb;   /* Forward decl. */
+
+/*
+ * A small call-table, so we don't have to check the service-type whenever
+ * we want to do something
+ */
+typedef struct {
+	int (*data_request)(struct ircomm_cb *, struct sk_buff *, int clen);
+	int (*connect_request)(struct ircomm_cb *, struct sk_buff *, 
+			       struct ircomm_info *);
+	int (*connect_response)(struct ircomm_cb *, struct sk_buff *);
+	int (*disconnect_request)(struct ircomm_cb *, struct sk_buff *, 
+				  struct ircomm_info *);	
+} call_t;
+
+struct ircomm_cb {
+	irda_queue_t queue;
+	magic_t magic;
+
+	notify_t notify;
+	call_t   issue;
+
+	int state;
+	int line;            /* Which TTY line we are using */
+
+	struct tsap_cb *tsap;
+	struct lsap_cb *lsap;
+	
+	__u8 dlsap_sel;      /* Destination LSAP/TSAP selector */
+	__u8 slsap_sel;      /* Source LSAP/TSAP selector */
+
+	__u32 saddr;         /* Source device address (link we are using) */
+	__u32 daddr;         /* Destination device address */
+
+	int max_header_size; /* Header space we must reserve for each frame */
+	int max_data_size;   /* The amount of data we can fill in each frame */
+
+	LOCAL_FLOW flow_status; /* Used by ircomm_lmp */
+	int pkt_count;          /* Number of frames we have sent to IrLAP */
+
+	__u8 service_type;
+};
+
+extern hashbin_t *ircomm;
+
+struct ircomm_cb *ircomm_open(notify_t *notify, __u8 service_type, int line);
+int ircomm_close(struct ircomm_cb *self);
+
+int ircomm_data_request(struct ircomm_cb *self, struct sk_buff *skb);
+void ircomm_data_indication(struct ircomm_cb *self, struct sk_buff *skb);
+void ircomm_process_data(struct ircomm_cb *self, struct sk_buff *skb);
+int ircomm_control_request(struct ircomm_cb *self, struct sk_buff *skb);
+int ircomm_connect_request(struct ircomm_cb *self, __u8 dlsap_sel, 
+			   __u32 saddr, __u32 daddr, struct sk_buff *skb,
+			   __u8 service_type);
+void ircomm_connect_indication(struct ircomm_cb *self, struct sk_buff *skb,
+			       struct ircomm_info *info);
+void ircomm_connect_confirm(struct ircomm_cb *self, struct sk_buff *skb,
+			    struct ircomm_info *info);
+int ircomm_connect_response(struct ircomm_cb *self, struct sk_buff *userdata);
+int ircomm_disconnect_request(struct ircomm_cb *self, struct sk_buff *userdata);
+void ircomm_disconnect_indication(struct ircomm_cb *self, struct sk_buff *skb,
+				  struct ircomm_info *info);
+void ircomm_flow_request(struct ircomm_cb *self, LOCAL_FLOW flow);
+
+#define ircomm_is_connected(self) (self->state == IRCOMM_CONN)
+
+#endif
diff --git a/drivers/staging/irda/include/net/irda/ircomm_event.h b/drivers/staging/irda/include/net/irda/ircomm_event.h
new file mode 100644
index 000000000000..5bbc32998d57
--- /dev/null
+++ b/drivers/staging/irda/include/net/irda/ircomm_event.h
@@ -0,0 +1,83 @@
+/*********************************************************************
+ *                
+ * Filename:      ircomm_event.h
+ * Version:       
+ * Description:   
+ * Status:        Experimental.
+ * Author:        Dag Brattli <dagb@cs.uit.no>
+ * Created at:    Sun Jun  6 23:51:13 1999
+ * Modified at:   Thu Jun 10 08:36:25 1999
+ * Modified by:   Dag Brattli <dagb@cs.uit.no>
+ * 
+ *     Copyright (c) 1999 Dag Brattli, All Rights Reserved.
+ *     
+ *     This program is free software; you can redistribute it and/or 
+ *     modify it under the terms of the GNU General Public License as 
+ *     published by the Free Software Foundation; either version 2 of 
+ *     the License, or (at your option) any later version.
+ * 
+ *     This program is distributed in the hope that it will be useful,
+ *     but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *     GNU General Public License for more details.
+ * 
+ *     You should have received a copy of the GNU General Public License 
+ *     along with this program; if not, see <http://www.gnu.org/licenses/>.
+ *     
+ ********************************************************************/
+
+#ifndef IRCOMM_EVENT_H
+#define IRCOMM_EVENT_H
+
+#include <net/irda/irmod.h>
+
+typedef enum {
+        IRCOMM_IDLE,
+        IRCOMM_WAITI,
+        IRCOMM_WAITR,
+        IRCOMM_CONN,
+} IRCOMM_STATE;
+
+/* IrCOMM Events */
+typedef enum {
+        IRCOMM_CONNECT_REQUEST,
+        IRCOMM_CONNECT_RESPONSE,
+        IRCOMM_TTP_CONNECT_INDICATION,
+	IRCOMM_LMP_CONNECT_INDICATION,
+        IRCOMM_TTP_CONNECT_CONFIRM,
+	IRCOMM_LMP_CONNECT_CONFIRM,
+
+        IRCOMM_LMP_DISCONNECT_INDICATION,
+	IRCOMM_TTP_DISCONNECT_INDICATION,
+        IRCOMM_DISCONNECT_REQUEST,
+
+        IRCOMM_TTP_DATA_INDICATION,
+	IRCOMM_LMP_DATA_INDICATION,
+        IRCOMM_DATA_REQUEST,
+        IRCOMM_CONTROL_REQUEST,
+        IRCOMM_CONTROL_INDICATION,
+} IRCOMM_EVENT;
+
+/*
+ * Used for passing information through the state-machine
+ */
+struct ircomm_info {
+        __u32     saddr;               /* Source device address */
+        __u32     daddr;               /* Destination device address */
+        __u8      dlsap_sel;
+        LM_REASON reason;              /* Reason for disconnect */
+	__u32     max_data_size;
+	__u32     max_header_size;
+
+	struct qos_info *qos;
+};
+
+extern const char *const ircomm_state[];
+
+struct ircomm_cb;   /* Forward decl. */
+
+int ircomm_do_event(struct ircomm_cb *self, IRCOMM_EVENT event,
+		    struct sk_buff *skb, struct ircomm_info *info);
+void ircomm_next_state(struct ircomm_cb *self, IRCOMM_STATE state);
+
+#endif
diff --git a/drivers/staging/irda/include/net/irda/ircomm_lmp.h b/drivers/staging/irda/include/net/irda/ircomm_lmp.h
new file mode 100644
index 000000000000..5042a5021a04
--- /dev/null
+++ b/drivers/staging/irda/include/net/irda/ircomm_lmp.h
@@ -0,0 +1,36 @@
+/*********************************************************************
+ *                
+ * Filename:      ircomm_lmp.h
+ * Version:       
+ * Description:   
+ * Status:        Experimental.
+ * Author:        Dag Brattli <dagb@cs.uit.no>
+ * Created at:    Wed Jun  9 10:06:07 1999
+ * Modified at:   Fri Aug 13 07:32:32 1999
+ * Modified by:   Dag Brattli <dagb@cs.uit.no>
+ * 
+ *     Copyright (c) 1999 Dag Brattli, All Rights Reserved.
+ *     
+ *     This program is free software; you can redistribute it and/or 
+ *     modify it under the terms of the GNU General Public License as 
+ *     published by the Free Software Foundation; either version 2 of 
+ *     the License, or (at your option) any later version.
+ * 
+ *     This program is distributed in the hope that it will be useful,
+ *     but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *     GNU General Public License for more details.
+ * 
+ *     You should have received a copy of the GNU General Public License 
+ *     along with this program; if not, see <http://www.gnu.org/licenses/>.
+ *     
+ ********************************************************************/
+
+#ifndef IRCOMM_LMP_H
+#define IRCOMM_LMP_H
+
+#include <net/irda/ircomm_core.h>
+
+int ircomm_open_lsap(struct ircomm_cb *self);
+
+#endif
diff --git a/drivers/staging/irda/include/net/irda/ircomm_param.h b/drivers/staging/irda/include/net/irda/ircomm_param.h
new file mode 100644
index 000000000000..1f67432321c4
--- /dev/null
+++ b/drivers/staging/irda/include/net/irda/ircomm_param.h
@@ -0,0 +1,147 @@
+/*********************************************************************
+ *                
+ * Filename:      ircomm_param.h
+ * Version:       1.0
+ * Description:   Parameter handling for the IrCOMM protocol
+ * Status:        Experimental.
+ * Author:        Dag Brattli <dagb@cs.uit.no>
+ * Created at:    Mon Jun  7 08:47:28 1999
+ * Modified at:   Wed Aug 25 13:46:33 1999
+ * Modified by:   Dag Brattli <dagb@cs.uit.no>
+ * 
+ *     Copyright (c) 1999 Dag Brattli, All Rights Reserved.
+ *     
+ *     This program is free software; you can redistribute it and/or 
+ *     modify it under the terms of the GNU General Public License as 
+ *     published by the Free Software Foundation; either version 2 of 
+ *     the License, or (at your option) any later version.
+ * 
+ *     This program is distributed in the hope that it will be useful,
+ *     but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *     GNU General Public License for more details.
+ * 
+ *     You should have received a copy of the GNU General Public License 
+ *     along with this program; if not, see <http://www.gnu.org/licenses/>.
+ *     
+ ********************************************************************/
+
+#ifndef IRCOMM_PARAMS_H
+#define IRCOMM_PARAMS_H
+
+#include <net/irda/parameters.h>
+
+/* Parameters common to all service types */
+#define IRCOMM_SERVICE_TYPE     0x00
+#define IRCOMM_PORT_TYPE        0x01 /* Only used in LM-IAS */
+#define IRCOMM_PORT_NAME        0x02 /* Only used in LM-IAS */
+
+/* Parameters for both 3 wire and 9 wire */
+#define IRCOMM_DATA_RATE        0x10
+#define IRCOMM_DATA_FORMAT      0x11
+#define IRCOMM_FLOW_CONTROL     0x12
+#define IRCOMM_XON_XOFF         0x13
+#define IRCOMM_ENQ_ACK          0x14
+#define IRCOMM_LINE_STATUS      0x15
+#define IRCOMM_BREAK            0x16
+
+/* Parameters for 9 wire */
+#define IRCOMM_DTE              0x20
+#define IRCOMM_DCE              0x21
+#define IRCOMM_POLL             0x22
+
+/* Service type (details) */
+#define IRCOMM_3_WIRE_RAW       0x01
+#define IRCOMM_3_WIRE           0x02
+#define IRCOMM_9_WIRE           0x04
+#define IRCOMM_CENTRONICS       0x08
+
+/* Port type (details) */
+#define IRCOMM_SERIAL           0x00
+#define IRCOMM_PARALLEL         0x01
+
+/* Data format (details) */
+#define IRCOMM_WSIZE_5          0x00
+#define IRCOMM_WSIZE_6          0x01
+#define IRCOMM_WSIZE_7          0x02
+#define IRCOMM_WSIZE_8          0x03
+
+#define IRCOMM_1_STOP_BIT       0x00
+#define IRCOMM_2_STOP_BIT       0x04 /* 1.5 if char len 5 */
+
+#define IRCOMM_PARITY_DISABLE   0x00
+#define IRCOMM_PARITY_ENABLE    0x08
+
+#define IRCOMM_PARITY_ODD       0x00
+#define IRCOMM_PARITY_EVEN      0x10
+#define IRCOMM_PARITY_MARK      0x20
+#define IRCOMM_PARITY_SPACE     0x30
+
+/* Flow control */
+#define IRCOMM_XON_XOFF_IN      0x01
+#define IRCOMM_XON_XOFF_OUT     0x02
+#define IRCOMM_RTS_CTS_IN       0x04
+#define IRCOMM_RTS_CTS_OUT      0x08
+#define IRCOMM_DSR_DTR_IN       0x10
+#define IRCOMM_DSR_DTR_OUT      0x20
+#define IRCOMM_ENQ_ACK_IN       0x40
+#define IRCOMM_ENQ_ACK_OUT      0x80
+
+/* Line status */
+#define IRCOMM_OVERRUN_ERROR    0x02
+#define IRCOMM_PARITY_ERROR     0x04
+#define IRCOMM_FRAMING_ERROR    0x08
+
+/* DTE (Data terminal equipment) line settings */
+#define IRCOMM_DELTA_DTR        0x01
+#define IRCOMM_DELTA_RTS        0x02
+#define IRCOMM_DTR              0x04
+#define IRCOMM_RTS              0x08
+
+/* DCE (Data communications equipment) line settings */
+#define IRCOMM_DELTA_CTS        0x01  /* Clear to send has changed */
+#define IRCOMM_DELTA_DSR        0x02  /* Data set ready has changed */
+#define IRCOMM_DELTA_RI         0x04  /* Ring indicator has changed */
+#define IRCOMM_DELTA_CD         0x08  /* Carrier detect has changed */
+#define IRCOMM_CTS              0x10  /* Clear to send is high */
+#define IRCOMM_DSR              0x20  /* Data set ready is high */
+#define IRCOMM_RI               0x40  /* Ring indicator is high */
+#define IRCOMM_CD               0x80  /* Carrier detect is high */
+#define IRCOMM_DCE_DELTA_ANY    0x0f
+
+/*
+ * Parameter state
+ */
+struct ircomm_params {
+	/* General control params */
+	__u8  service_type;
+	__u8  port_type;
+	char  port_name[32];
+
+	/* Control params for 3- and 9-wire service type */
+	__u32 data_rate;         /* Data rate in bps */
+	__u8  data_format;
+	__u8  flow_control;
+	char  xonxoff[2];
+	char  enqack[2];
+	__u8  line_status;
+	__u8  _break;
+
+	__u8  null_modem;
+
+	/* Control params for 9-wire service type */
+	__u8 dte;
+	__u8 dce;
+	__u8 poll;
+
+	/* Control params for Centronics service type */
+};
+
+struct ircomm_tty_cb; /* Forward decl. */
+
+int ircomm_param_request(struct ircomm_tty_cb *self, __u8 pi, int flush);
+
+extern pi_param_info_t ircomm_param_info;
+
+#endif /* IRCOMM_PARAMS_H */
+
diff --git a/drivers/staging/irda/include/net/irda/ircomm_ttp.h b/drivers/staging/irda/include/net/irda/ircomm_ttp.h
new file mode 100644
index 000000000000..c5627288bca3
--- /dev/null
+++ b/drivers/staging/irda/include/net/irda/ircomm_ttp.h
@@ -0,0 +1,37 @@
+/*********************************************************************
+ *                
+ * Filename:      ircomm_ttp.h
+ * Version:       
+ * Description:   
+ * Status:        Experimental.
+ * Author:        Dag Brattli <dagb@cs.uit.no>
+ * Created at:    Wed Jun  9 10:06:07 1999
+ * Modified at:   Fri Aug 13 07:32:22 1999
+ * Modified by:   Dag Brattli <dagb@cs.uit.no>
+ * 
+ *     Copyright (c) 1999 Dag Brattli, All Rights Reserved.
+ *     
+ *     This program is free software; you can redistribute it and/or 
+ *     modify it under the terms of the GNU General Public License as 
+ *     published by the Free Software Foundation; either version 2 of 
+ *     the License, or (at your option) any later version.
+ * 
+ *     This program is distributed in the hope that it will be useful,
+ *     but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *     GNU General Public License for more details.
+ * 
+ *     You should have received a copy of the GNU General Public License 
+ *     along with this program; if not, see <http://www.gnu.org/licenses/>.
+ *     
+ ********************************************************************/
+
+#ifndef IRCOMM_TTP_H
+#define IRCOMM_TTP_H
+
+#include <net/irda/ircomm_core.h>
+
+int  ircomm_open_tsap(struct ircomm_cb *self);
+
+#endif
+
diff --git a/drivers/staging/irda/include/net/irda/ircomm_tty.h b/drivers/staging/irda/include/net/irda/ircomm_tty.h
new file mode 100644
index 000000000000..8d4f588974bc
--- /dev/null
+++ b/drivers/staging/irda/include/net/irda/ircomm_tty.h
@@ -0,0 +1,121 @@
+/*********************************************************************
+ *                
+ * Filename:      ircomm_tty.h
+ * Version:       
+ * Description:   
+ * Status:        Experimental.
+ * Author:        Dag Brattli <dagb@cs.uit.no>
+ * Created at:    Sun Jun  6 23:24:22 1999
+ * Modified at:   Fri Jan 28 13:16:57 2000
+ * Modified by:   Dag Brattli <dagb@cs.uit.no>
+ * 
+ *     Copyright (c) 1999-2000 Dag Brattli, All Rights Reserved.
+ *     
+ *     This program is free software; you can redistribute it and/or 
+ *     modify it under the terms of the GNU General Public License as 
+ *     published by the Free Software Foundation; either version 2 of 
+ *     the License, or (at your option) any later version.
+ * 
+ *     This program is distributed in the hope that it will be useful,
+ *     but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *     GNU General Public License for more details.
+ * 
+ *     You should have received a copy of the GNU General Public License 
+ *     along with this program; if not, see <http://www.gnu.org/licenses/>.
+ *     
+ ********************************************************************/
+
+#ifndef IRCOMM_TTY_H
+#define IRCOMM_TTY_H
+
+#include <linux/serial.h>
+#include <linux/termios.h>
+#include <linux/timer.h>
+#include <linux/tty.h>		/* struct tty_struct */
+
+#include <net/irda/irias_object.h>
+#include <net/irda/ircomm_core.h>
+#include <net/irda/ircomm_param.h>
+
+#define IRCOMM_TTY_PORTS 32
+#define IRCOMM_TTY_MAGIC 0x3432
+#define IRCOMM_TTY_MAJOR 161
+#define IRCOMM_TTY_MINOR 0
+
+/* This is used as an initial value to max_header_size before the proper
+ * value is filled in (5 for ttp, 4 for lmp). This allow us to detect
+ * the state of the underlying connection. - Jean II */
+#define IRCOMM_TTY_HDR_UNINITIALISED	16
+/* Same for payload size. See qos.c for the smallest max data size */
+#define IRCOMM_TTY_DATA_UNINITIALISED	(64 - IRCOMM_TTY_HDR_UNINITIALISED)
+
+/*
+ * IrCOMM TTY driver state
+ */
+struct ircomm_tty_cb {
+	irda_queue_t queue;            /* Must be first */
+	struct tty_port port;
+	magic_t magic;
+
+	int state;                /* Connect state */
+
+	struct ircomm_cb *ircomm; /* IrCOMM layer instance */
+
+	struct sk_buff *tx_skb;   /* Transmit buffer */
+	struct sk_buff *ctrl_skb; /* Control data buffer */
+
+	/* Parameters */
+	struct ircomm_params settings;
+
+	__u8 service_type;        /* The service that we support */
+	int client;               /* True if we are a client */
+	LOCAL_FLOW flow;          /* IrTTP flow status */
+
+	int line;
+
+	__u8 dlsap_sel;
+	__u8 slsap_sel;
+
+	__u32 saddr;
+	__u32 daddr;
+
+	__u32 max_data_size;   /* Max data we can transmit in one packet */
+	__u32 max_header_size; /* The amount of header space we must reserve */
+	__u32 tx_data_size;	/* Max data size of current tx_skb */
+
+	struct iriap_cb *iriap; /* Instance used for querying remote IAS */
+	struct ias_object* obj;
+	void *skey;
+	void *ckey;
+
+	struct timer_list watchdog_timer;
+	struct work_struct  tqueue;
+
+	/* Protect concurent access to :
+	 *	o self->ctrl_skb
+	 *	o self->tx_skb
+	 * Maybe other things may gain to be protected as well...
+	 * Jean II */
+	spinlock_t spinlock;
+};
+
+void ircomm_tty_start(struct tty_struct *tty);
+void ircomm_tty_check_modem_status(struct ircomm_tty_cb *self);
+
+int ircomm_tty_tiocmget(struct tty_struct *tty);
+int ircomm_tty_tiocmset(struct tty_struct *tty, unsigned int set,
+			unsigned int clear);
+int ircomm_tty_ioctl(struct tty_struct *tty, unsigned int cmd,
+		     unsigned long arg);
+void ircomm_tty_set_termios(struct tty_struct *tty,
+			    struct ktermios *old_termios);
+
+#endif
+
+
+
+
+
+
+
diff --git a/drivers/staging/irda/include/net/irda/ircomm_tty_attach.h b/drivers/staging/irda/include/net/irda/ircomm_tty_attach.h
new file mode 100644
index 000000000000..20dcbdf258cf
--- /dev/null
+++ b/drivers/staging/irda/include/net/irda/ircomm_tty_attach.h
@@ -0,0 +1,92 @@
+/*********************************************************************
+ *                
+ * Filename:      ircomm_tty_attach.h
+ * Version:       
+ * Description:   
+ * Status:        Experimental.
+ * Author:        Dag Brattli <dagb@cs.uit.no>
+ * Created at:    Wed Jun  9 15:55:18 1999
+ * Modified at:   Fri Dec 10 21:04:55 1999
+ * Modified by:   Dag Brattli <dagb@cs.uit.no>
+ * 
+ *     Copyright (c) 1999 Dag Brattli, All Rights Reserved.
+ *     
+ *     This program is free software; you can redistribute it and/or 
+ *     modify it under the terms of the GNU General Public License as 
+ *     published by the Free Software Foundation; either version 2 of 
+ *     the License, or (at your option) any later version.
+ * 
+ *     This program is distributed in the hope that it will be useful,
+ *     but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *     GNU General Public License for more details.
+ * 
+ *     You should have received a copy of the GNU General Public License 
+ *     along with this program; if not, see <http://www.gnu.org/licenses/>.
+ *     
+ ********************************************************************/
+
+#ifndef IRCOMM_TTY_ATTACH_H
+#define IRCOMM_TTY_ATTACH_H
+
+#include <net/irda/ircomm_tty.h>
+
+typedef enum {
+        IRCOMM_TTY_IDLE,
+	IRCOMM_TTY_SEARCH,
+        IRCOMM_TTY_QUERY_PARAMETERS,
+	IRCOMM_TTY_QUERY_LSAP_SEL,
+	IRCOMM_TTY_SETUP,
+        IRCOMM_TTY_READY,
+} IRCOMM_TTY_STATE;
+
+/* IrCOMM TTY Events */
+typedef enum {
+	IRCOMM_TTY_ATTACH_CABLE,
+	IRCOMM_TTY_DETACH_CABLE,
+	IRCOMM_TTY_DATA_REQUEST,
+	IRCOMM_TTY_DATA_INDICATION,
+	IRCOMM_TTY_DISCOVERY_REQUEST,
+	IRCOMM_TTY_DISCOVERY_INDICATION,
+	IRCOMM_TTY_CONNECT_CONFIRM,
+	IRCOMM_TTY_CONNECT_INDICATION,
+	IRCOMM_TTY_DISCONNECT_REQUEST,
+	IRCOMM_TTY_DISCONNECT_INDICATION,
+	IRCOMM_TTY_WD_TIMER_EXPIRED,
+	IRCOMM_TTY_GOT_PARAMETERS,
+	IRCOMM_TTY_GOT_LSAPSEL,
+} IRCOMM_TTY_EVENT;
+
+/* Used for passing information through the state-machine */
+struct ircomm_tty_info {
+        __u32     saddr;               /* Source device address */
+        __u32     daddr;               /* Destination device address */
+        __u8      dlsap_sel;
+};
+
+extern const char *const ircomm_state[];
+extern const char *const ircomm_tty_state[];
+
+int ircomm_tty_do_event(struct ircomm_tty_cb *self, IRCOMM_TTY_EVENT event,
+			struct sk_buff *skb, struct ircomm_tty_info *info);
+
+
+int  ircomm_tty_attach_cable(struct ircomm_tty_cb *self);
+void ircomm_tty_detach_cable(struct ircomm_tty_cb *self);
+void ircomm_tty_connect_confirm(void *instance, void *sap, 
+				struct qos_info *qos, 
+				__u32 max_sdu_size, 
+				__u8 max_header_size, 
+				struct sk_buff *skb);
+void ircomm_tty_disconnect_indication(void *instance, void *sap, 
+				      LM_REASON reason,
+				      struct sk_buff *skb);
+void ircomm_tty_connect_indication(void *instance, void *sap, 
+				   struct qos_info *qos, 
+				   __u32 max_sdu_size,
+				   __u8 max_header_size, 
+				   struct sk_buff *skb);
+int ircomm_tty_send_initial_parameters(struct ircomm_tty_cb *self);
+void ircomm_tty_link_established(struct ircomm_tty_cb *self);
+
+#endif /* IRCOMM_TTY_ATTACH_H */
diff --git a/drivers/staging/irda/include/net/irda/irda.h b/drivers/staging/irda/include/net/irda/irda.h
new file mode 100644
index 000000000000..92c8fb575213
--- /dev/null
+++ b/drivers/staging/irda/include/net/irda/irda.h
@@ -0,0 +1,115 @@
+/*********************************************************************
+ *                
+ * Filename:      irda.h
+ * Version:       1.0
+ * Description:   IrDA common include file for kernel internal use
+ * Status:        Stable
+ * Author:        Dag Brattli <dagb@cs.uit.no>
+ * Created at:    Tue Dec  9 21:13:12 1997
+ * Modified at:   Fri Jan 28 13:16:32 2000
+ * Modified by:   Dag Brattli <dagb@cs.uit.no>
+ * 
+ *     Copyright (c) 1998-2000 Dag Brattli, All Rights Reserved.
+ *     Copyright (c) 2000-2002 Jean Tourrilhes <jt@hpl.hp.com>
+ *      
+ *     This program is free software; you can redistribute it and/or 
+ *     modify it under the terms of the GNU General Public License as 
+ *     published by the Free Software Foundation; either version 2 of 
+ *     the License, or (at your option) any later version.
+ *  
+ *     Neither Dag Brattli nor University of Tromsø admit liability nor
+ *     provide warranty for any of this software. This material is 
+ *     provided "AS-IS" and at no charge.
+ *     
+ ********************************************************************/
+
+#ifndef NET_IRDA_H
+#define NET_IRDA_H
+
+#include <linux/skbuff.h>		/* struct sk_buff */
+#include <linux/kernel.h>
+#include <linux/if.h>			/* sa_family_t in <linux/irda.h> */
+#include <linux/irda.h>
+
+typedef __u32 magic_t;
+
+#ifndef TRUE
+#define TRUE 1
+#endif
+
+#ifndef FALSE 
+#define FALSE 0
+#endif
+
+/* Hack to do small backoff when setting media busy in IrLAP */
+#ifndef SMALL
+#define SMALL 5
+#endif
+
+#ifndef IRDA_MIN /* Lets not mix this MIN with other header files */
+#define IRDA_MIN(a, b) (((a) < (b)) ? (a) : (b))
+#endif
+
+#ifndef IRDA_ALIGN
+#  define IRDA_ALIGN __attribute__((aligned))
+#endif
+
+#ifdef CONFIG_IRDA_DEBUG
+#define IRDA_ASSERT(expr, func) \
+do { if(!(expr)) { \
+	printk( "Assertion failed! %s:%s:%d %s\n", \
+		__FILE__,__func__,__LINE__,(#expr) ); \
+	func } } while (0)
+#define IRDA_ASSERT_LABEL(label)	label
+#else
+#define IRDA_ASSERT(expr, func) do { (void)(expr); } while (0)
+#define IRDA_ASSERT_LABEL(label)
+#endif /* CONFIG_IRDA_DEBUG */
+
+/*
+ *  Magic numbers used by Linux-IrDA. Random numbers which must be unique to 
+ *  give the best protection
+ */
+
+#define IRTTY_MAGIC        0x2357
+#define LAP_MAGIC          0x1357
+#define LMP_MAGIC          0x4321
+#define LMP_LSAP_MAGIC     0x69333
+#define LMP_LAP_MAGIC      0x3432
+#define IRDA_DEVICE_MAGIC  0x63454
+#define IAS_MAGIC          0x007
+#define TTP_MAGIC          0x241169
+#define TTP_TSAP_MAGIC     0x4345
+#define IROBEX_MAGIC       0x341324
+#define HB_MAGIC           0x64534
+#define IRLAN_MAGIC        0x754
+#define IAS_OBJECT_MAGIC   0x34234
+#define IAS_ATTRIB_MAGIC   0x45232
+#define IRDA_TASK_MAGIC    0x38423
+
+#define IAS_DEVICE_ID 0x0000 /* Defined by IrDA, IrLMP section 4.1 (page 68) */
+#define IAS_PNP_ID    0xd342
+#define IAS_OBEX_ID   0x34323
+#define IAS_IRLAN_ID  0x34234
+#define IAS_IRCOMM_ID 0x2343
+#define IAS_IRLPT_ID  0x9876
+
+struct net_device;
+struct packet_type;
+
+void irda_proc_register(void);
+void irda_proc_unregister(void);
+
+int irda_sysctl_register(void);
+void irda_sysctl_unregister(void);
+
+int irsock_init(void);
+void irsock_cleanup(void);
+
+int irda_nl_register(void);
+void irda_nl_unregister(void);
+
+int irlap_driver_rcv(struct sk_buff *skb, struct net_device *dev,
+		     struct packet_type *ptype, struct net_device *orig_dev);
+
+#endif /* NET_IRDA_H */
diff --git a/drivers/staging/irda/include/net/irda/irda_device.h b/drivers/staging/irda/include/net/irda/irda_device.h
new file mode 100644
index 000000000000..664bf8178412
--- /dev/null
+++ b/drivers/staging/irda/include/net/irda/irda_device.h
@@ -0,0 +1,285 @@
+/*********************************************************************
+ *                
+ * Filename:      irda_device.h
+ * Version:       0.9
+ * Description:   Contains various declarations used by the drivers
+ * Status:        Experimental.
+ * Author:        Dag Brattli <dagb@cs.uit.no>
+ * Created at:    Tue Apr 14 12:41:42 1998
+ * Modified at:   Mon Mar 20 09:08:57 2000
+ * Modified by:   Dag Brattli <dagb@cs.uit.no>
+ * 
+ *     Copyright (c) 1999-2000 Dag Brattli, All Rights Reserved.
+ *     Copyright (c) 1998 Thomas Davis, <ratbert@radiks.net>,
+ *     Copyright (c) 2000-2002 Jean Tourrilhes <jt@hpl.hp.com>
+ *
+ *     This program is free software; you can redistribute it and/or 
+ *     modify it under the terms of the GNU General Public License as 
+ *     published by the Free Software Foundation; either version 2 of 
+ *     the License, or (at your option) any later version.
+ * 
+ *     This program is distributed in the hope that it will be useful,
+ *     but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *     GNU General Public License for more details.
+ * 
+ *     You should have received a copy of the GNU General Public License 
+ *     along with this program; if not, see <http://www.gnu.org/licenses/>.
+ *     
+ ********************************************************************/
+
+/*
+ * This header contains all the IrDA definitions a driver really
+ * needs, and therefore the driver should not need to include
+ * any other IrDA headers - Jean II
+ */
+
+#ifndef IRDA_DEVICE_H
+#define IRDA_DEVICE_H
+
+#include <linux/tty.h>
+#include <linux/netdevice.h>
+#include <linux/spinlock.h>
+#include <linux/skbuff.h>		/* struct sk_buff */
+#include <linux/irda.h>
+#include <linux/types.h>
+
+#include <net/pkt_sched.h>
+#include <net/irda/irda.h>
+#include <net/irda/qos.h>		/* struct qos_info */
+#include <net/irda/irqueue.h>		/* irda_queue_t */
+
+/* A few forward declarations (to make compiler happy) */
+struct irlap_cb;
+
+/* Some non-standard interface flags (should not conflict with any in if.h) */
+#define IFF_SIR 	0x0001 /* Supports SIR speeds */
+#define IFF_MIR 	0x0002 /* Supports MIR speeds */
+#define IFF_FIR 	0x0004 /* Supports FIR speeds */
+#define IFF_VFIR        0x0008 /* Supports VFIR speeds */
+#define IFF_PIO   	0x0010 /* Supports PIO transfer of data */
+#define IFF_DMA		0x0020 /* Supports DMA transfer of data */
+#define IFF_SHM         0x0040 /* Supports shared memory data transfers */
+#define IFF_DONGLE      0x0080 /* Interface has a dongle attached */
+#define IFF_AIR         0x0100 /* Supports Advanced IR (AIR) standards */
+
+#define IO_XMIT 0x01
+#define IO_RECV 0x02
+
+typedef enum {
+	IRDA_IRLAP, /* IrDA mode, and deliver to IrLAP */
+	IRDA_RAW,   /* IrDA mode */
+	SHARP_ASK,
+	TV_REMOTE,  /* Also known as Consumer Electronics IR */
+} INFRARED_MODE;
+
+typedef enum {
+	IRDA_TASK_INIT,        /* All tasks are initialized with this state */
+	IRDA_TASK_DONE,        /* Signals that the task is finished */
+	IRDA_TASK_WAIT,
+	IRDA_TASK_WAIT1,
+	IRDA_TASK_WAIT2,
+	IRDA_TASK_WAIT3,
+	IRDA_TASK_CHILD_INIT,  /* Initializing child task */
+	IRDA_TASK_CHILD_WAIT,  /* Waiting for child task to finish */
+	IRDA_TASK_CHILD_DONE   /* Child task is finished */
+} IRDA_TASK_STATE;
+
+struct irda_task;
+typedef int (*IRDA_TASK_CALLBACK) (struct irda_task *task);
+
+struct irda_task {
+	irda_queue_t q;
+	magic_t magic;
+
+	IRDA_TASK_STATE state;
+	IRDA_TASK_CALLBACK function;
+	IRDA_TASK_CALLBACK finished;
+
+	struct irda_task *parent;
+	struct timer_list timer;
+
+	void *instance; /* Instance being called */
+	void *param;    /* Parameter to be used by instance */
+};
+
+/* Dongle info */
+struct dongle_reg;
+typedef struct {
+	struct dongle_reg *issue;     /* Registration info */
+	struct net_device *dev;           /* Device we are attached to */
+	struct irda_task *speed_task; /* Task handling speed change */
+	struct irda_task *reset_task; /* Task handling reset */
+	__u32 speed;                  /* Current speed */
+
+	/* Callbacks to the IrDA device driver */
+	int (*set_mode)(struct net_device *, int mode);
+	int (*read)(struct net_device *dev, __u8 *buf, int len);
+	int (*write)(struct net_device *dev, __u8 *buf, int len);
+	int (*set_dtr_rts)(struct net_device *dev, int dtr, int rts);
+} dongle_t;
+
+/* Dongle registration info */
+struct dongle_reg {
+	irda_queue_t q;         /* Must be first */
+	IRDA_DONGLE type;
+
+	void (*open)(dongle_t *dongle, struct qos_info *qos);
+	void (*close)(dongle_t *dongle);
+	int  (*reset)(struct irda_task *task);
+	int  (*change_speed)(struct irda_task *task);
+	struct module *owner;
+};
+
+/* 
+ * Per-packet information we need to hide inside sk_buff 
+ * (must not exceed 48 bytes, check with struct sk_buff)
+ * The default_qdisc_pad field is a temporary hack.
+ */
+struct irda_skb_cb {
+	unsigned int default_qdisc_pad;
+	magic_t magic;       /* Be sure that we can trust the information */
+	__u32   next_speed;  /* The Speed to be set *after* this frame */
+	__u16   mtt;         /* Minimum turn around time */
+	__u16   xbofs;       /* Number of xbofs required, used by SIR mode */
+	__u16   next_xbofs;  /* Number of xbofs required *after* this frame */
+	void    *context;    /* May be used by drivers */
+	void    (*destructor)(struct sk_buff *skb); /* Used for flow control */
+	__u16   xbofs_delay; /* Number of xbofs used for generating the mtt */
+	__u8    line;        /* Used by IrCOMM in IrLPT mode */
+};
+
+/* Chip specific info */
+typedef struct {
+	int cfg_base;         /* Config register IO base */
+        int sir_base;         /* SIR IO base */
+	int fir_base;         /* FIR IO base */
+	int mem_base;         /* Shared memory base */
+        int sir_ext;          /* Length of SIR iobase */
+	int fir_ext;          /* Length of FIR iobase */
+        int irq, irq2;        /* Interrupts used */
+        int dma, dma2;        /* DMA channel(s) used */
+        int fifo_size;        /* FIFO size */
+        int irqflags;         /* interrupt flags (ie, IRQF_SHARED) */
+	int direction;        /* Link direction, used by some FIR drivers */
+	int enabled;          /* Powered on? */
+	int suspended;        /* Suspended by APM */
+	__u32 speed;          /* Currently used speed */
+	__u32 new_speed;      /* Speed we must change to when Tx is finished */
+	int dongle_id;        /* Dongle or transceiver currently used */
+} chipio_t;
+
+/* IO buffer specific info (inspired by struct sk_buff) */
+typedef struct {
+	int state;            /* Receiving state (transmit state not used) */
+	int in_frame;         /* True if receiving frame */
+
+	__u8 *head;	      /* start of buffer */
+	__u8 *data;	      /* start of data in buffer */
+
+	int len;	      /* current length of data */
+	int truesize;	      /* total allocated size of buffer */
+	__u16 fcs;
+
+	struct sk_buff *skb;	/* ZeroCopy Rx in async_unwrap_char() */
+} iobuff_t;
+
+/* Maximum SIR frame (skb) that we expect to receive *unwrapped*.
+ * Max LAP MTU (I field) is 2048 bytes max (IrLAP 1.1, chapt 6.6.5, p40).
+ * Max LAP header is 2 bytes (for now).
+ * Max CRC is 2 bytes at SIR, 4 bytes at FIR. 
+ * Need 1 byte for skb_reserve() to align IP header for IrLAN.
+ * Add a few extra bytes just to be safe (buffer is power of two anyway)
+ * Jean II */
+#define IRDA_SKB_MAX_MTU	2064
+/* Maximum SIR frame that we expect to send, wrapped (i.e. with XBOFS
+ * and escaped characters on top of above). */
+#define IRDA_SIR_MAX_FRAME	4269
+
+/* The SIR unwrapper async_unwrap_char() will use a Rx-copy-break mechanism
+ * when using the optional ZeroCopy Rx, where only small frames are memcpy
+ * to a smaller skb to save memory. This is the threshold under which copy
+ * will happen (and over which it won't happen).
+ * Some FIR drivers may use this #define as well...
+ * This is the same value as various Ethernet drivers. - Jean II */
+#define IRDA_RX_COPY_THRESHOLD  256
+
+/* Function prototypes */
+int  irda_device_init(void);
+void irda_device_cleanup(void);
+
+/* IrLAP entry points used by the drivers.
+ * We declare them here to avoid the driver pulling a whole bunch stack
+ * headers they don't really need - Jean II */
+struct irlap_cb *irlap_open(struct net_device *dev, struct qos_info *qos,
+			    const char *hw_name);
+void irlap_close(struct irlap_cb *self);
+
+/* Interface to be uses by IrLAP */
+void irda_device_set_media_busy(struct net_device *dev, int status);
+int  irda_device_is_media_busy(struct net_device *dev);
+int  irda_device_is_receiving(struct net_device *dev);
+
+/* Interface for internal use */
+static inline int irda_device_txqueue_empty(const struct net_device *dev)
+{
+	return qdisc_all_tx_empty(dev);
+}
+int  irda_device_set_raw_mode(struct net_device* self, int status);
+struct net_device *alloc_irdadev(int sizeof_priv);
+
+void irda_setup_dma(int channel, dma_addr_t buffer, int count, int mode);
+
+/*
+ * Function irda_get_mtt (skb)
+ *
+ *    Utility function for getting the minimum turnaround time out of 
+ *    the skb, where it has been hidden in the cb field.
+ */
+static inline __u16 irda_get_mtt(const struct sk_buff *skb)
+{
+	const struct irda_skb_cb *cb = (const struct irda_skb_cb *) skb->cb;
+	return (cb->magic == LAP_MAGIC) ? cb->mtt : 10000;
+}
+
+/*
+ * Function irda_get_next_speed (skb)
+ *
+ *    Extract the speed that should be set *after* this frame from the skb
+ *
+ * Note : return -1 for user space frames
+ */
+static inline __u32 irda_get_next_speed(const struct sk_buff *skb)
+{
+	const struct irda_skb_cb *cb = (const struct irda_skb_cb *) skb->cb;
+	return (cb->magic == LAP_MAGIC) ? cb->next_speed : -1;
+}
+
+/*
+ * Function irda_get_next_xbofs (skb)
+ *
+ *    Extract the xbofs that should be set for this frame from the skb
+ *
+ * Note : default to 10 for user space frames
+ */
+static inline __u16 irda_get_xbofs(const struct sk_buff *skb)
+{
+	const struct irda_skb_cb *cb = (const struct irda_skb_cb *) skb->cb;
+	return (cb->magic == LAP_MAGIC) ? cb->xbofs : 10;
+}
+
+/*
+ * Function irda_get_next_xbofs (skb)
+ *
+ *    Extract the xbofs that should be set *after* this frame from the skb
+ *
+ * Note : return -1 for user space frames
+ */
+static inline __u16 irda_get_next_xbofs(const struct sk_buff *skb)
+{
+	const struct irda_skb_cb *cb = (const struct irda_skb_cb *) skb->cb;
+	return (cb->magic == LAP_MAGIC) ? cb->next_xbofs : -1;
+}
+#endif /* IRDA_DEVICE_H */
+
+
diff --git a/drivers/staging/irda/include/net/irda/iriap.h b/drivers/staging/irda/include/net/irda/iriap.h
new file mode 100644
index 000000000000..fcc896491a95
--- /dev/null
+++ b/drivers/staging/irda/include/net/irda/iriap.h
@@ -0,0 +1,108 @@
+/*********************************************************************
+ *                
+ * Filename:      iriap.h
+ * Version:       0.5
+ * Description:   Information Access Protocol (IAP)
+ * Status:        Experimental.
+ * Author:        Dag Brattli <dagb@cs.uit.no>
+ * Created at:    Thu Aug 21 00:02:07 1997
+ * Modified at:   Sat Dec 25 16:42:09 1999
+ * Modified by:   Dag Brattli <dagb@cs.uit.no>
+ * 
+ *     Copyright (c) 1997-1999 Dag Brattli <dagb@cs.uit.no>, 
+ *     All Rights Reserved.
+ *     
+ *     This program is free software; you can redistribute it and/or 
+ *     modify it under the terms of the GNU General Public License as 
+ *     published by the Free Software Foundation; either version 2 of 
+ *     the License, or (at your option) any later version.
+ *
+ *     Neither Dag Brattli nor University of Tromsø admit liability nor
+ *     provide warranty for any of this software. This material is 
+ *     provided "AS-IS" and at no charge.
+ *
+ ********************************************************************/
+
+#ifndef IRIAP_H
+#define IRIAP_H
+
+#include <linux/types.h>
+#include <linux/skbuff.h>
+
+#include <net/irda/iriap_event.h>
+#include <net/irda/irias_object.h>
+#include <net/irda/irqueue.h>		/* irda_queue_t */
+#include <net/irda/timer.h>		/* struct timer_list */
+
+#define IAP_LST 0x80
+#define IAP_ACK 0x40
+
+#define IAS_SERVER 0
+#define IAS_CLIENT 1
+
+/* IrIAP Op-codes */
+#define GET_INFO_BASE      0x01
+#define GET_OBJECTS        0x02
+#define GET_VALUE          0x03
+#define GET_VALUE_BY_CLASS 0x04
+#define GET_OBJECT_INFO    0x05
+#define GET_ATTRIB_NAMES   0x06
+
+#define IAS_SUCCESS        0
+#define IAS_CLASS_UNKNOWN  1
+#define IAS_ATTRIB_UNKNOWN 2
+#define IAS_DISCONNECT     10
+
+typedef void (*CONFIRM_CALLBACK)(int result, __u16 obj_id, 
+				 struct ias_value *value, void *priv);
+
+struct iriap_cb {
+	irda_queue_t q; /* Must be first */	
+	magic_t magic;  /* Magic cookie */
+
+	int          mode;   /* Client or server */
+
+	__u32        saddr;
+	__u32        daddr;
+	__u8         operation;
+
+	struct sk_buff *request_skb;
+	struct lsap_cb *lsap;
+	__u8 slsap_sel;
+
+	/* Client states */
+	IRIAP_STATE client_state;
+	IRIAP_STATE call_state;
+	
+	/* Server states */
+	IRIAP_STATE server_state;
+	IRIAP_STATE r_connect_state;
+	
+	CONFIRM_CALLBACK confirm;
+	void *priv;                /* Used to identify client */
+
+	__u8 max_header_size;
+	__u32 max_data_size;
+	
+	struct timer_list watchdog_timer;
+};
+
+int  iriap_init(void);
+void iriap_cleanup(void);
+
+struct iriap_cb *iriap_open(__u8 slsap_sel, int mode, void *priv, 
+			    CONFIRM_CALLBACK callback);
+void iriap_close(struct iriap_cb *self);
+
+int iriap_getvaluebyclass_request(struct iriap_cb *self, 
+				  __u32 saddr, __u32 daddr,
+				  char *name, char *attr);
+void iriap_connect_request(struct iriap_cb *self);
+void iriap_send_ack( struct iriap_cb *self);
+void iriap_call_indication(struct iriap_cb *self, struct sk_buff *skb);
+
+void iriap_register_server(void);
+
+#endif
+
+
diff --git a/drivers/staging/irda/include/net/irda/iriap_event.h b/drivers/staging/irda/include/net/irda/iriap_event.h
new file mode 100644
index 000000000000..89747f06d9eb
--- /dev/null
+++ b/drivers/staging/irda/include/net/irda/iriap_event.h
@@ -0,0 +1,85 @@
+/*********************************************************************
+ *                
+ * Filename:      iriap_event.h
+ * Version:       
+ * Description:   
+ * Status:        Experimental.
+ * Author:        Dag Brattli <dagb@cs.uit.no>
+ * Created at:    Mon Aug  4 20:40:53 1997
+ * Modified at:   Sun Oct 31 22:02:54 1999
+ * Modified by:   Dag Brattli <dagb@cs.uit.no>
+ * 
+ *     Copyright (c) 1998-1999 Dag Brattli <dagb@cs.uit.no>, All Rights Reserved.
+ *     
+ *     This program is free software; you can redistribute it and/or 
+ *     modify it under the terms of the GNU General Public License as 
+ *     published by the Free Software Foundation; either version 2 of 
+ *     the License, or (at your option) any later version.
+ *
+ *     Neither Dag Brattli nor University of Tromsø admit liability nor
+ *     provide warranty for any of this software. This material is 
+ *     provided "AS-IS" and at no charge.
+ *
+ ********************************************************************/
+
+#ifndef IRIAP_FSM_H
+#define IRIAP_FSM_H
+
+/* Forward because of circular include dependecies */
+struct iriap_cb;
+
+/* IrIAP states */
+typedef enum {
+	/* Client */
+	S_DISCONNECT,
+	S_CONNECTING,
+	S_CALL,
+
+	/* S-Call */
+	S_MAKE_CALL,
+	S_CALLING,
+	S_OUTSTANDING,
+	S_REPLYING,
+	S_WAIT_FOR_CALL,
+	S_WAIT_ACTIVE,
+
+	/* Server */
+	R_DISCONNECT,
+	R_CALL,
+	
+	/* R-Connect */
+	R_WAITING,
+	R_WAIT_ACTIVE,
+	R_RECEIVING,
+	R_EXECUTE,
+	R_RETURNING,
+} IRIAP_STATE;
+
+typedef enum {
+	IAP_CALL_REQUEST,
+	IAP_CALL_REQUEST_GVBC,
+	IAP_CALL_RESPONSE,
+	IAP_RECV_F_LST,
+	IAP_LM_DISCONNECT_INDICATION,
+	IAP_LM_CONNECT_INDICATION,
+	IAP_LM_CONNECT_CONFIRM,
+} IRIAP_EVENT;
+
+void iriap_next_client_state   (struct iriap_cb *self, IRIAP_STATE state);
+void iriap_next_call_state     (struct iriap_cb *self, IRIAP_STATE state);
+void iriap_next_server_state   (struct iriap_cb *self, IRIAP_STATE state);
+void iriap_next_r_connect_state(struct iriap_cb *self, IRIAP_STATE state);
+
+
+void iriap_do_client_event(struct iriap_cb *self, IRIAP_EVENT event, 
+			   struct sk_buff *skb);
+void iriap_do_call_event  (struct iriap_cb *self, IRIAP_EVENT event, 
+			   struct sk_buff *skb);
+
+void iriap_do_server_event   (struct iriap_cb *self, IRIAP_EVENT event, 
+			      struct sk_buff *skb);
+void iriap_do_r_connect_event(struct iriap_cb *self, IRIAP_EVENT event, 
+			      struct sk_buff *skb);
+
+#endif /* IRIAP_FSM_H */
+
diff --git a/drivers/staging/irda/include/net/irda/irias_object.h b/drivers/staging/irda/include/net/irda/irias_object.h
new file mode 100644
index 000000000000..83f78081799c
--- /dev/null
+++ b/drivers/staging/irda/include/net/irda/irias_object.h
@@ -0,0 +1,108 @@
+/*********************************************************************
+ *                
+ * Filename:      irias_object.h
+ * Version:       
+ * Description:   
+ * Status:        Experimental.
+ * Author:        Dag Brattli <dagb@cs.uit.no>
+ * Created at:    Thu Oct  1 22:49:50 1998
+ * Modified at:   Wed Dec 15 11:20:57 1999
+ * Modified by:   Dag Brattli <dagb@cs.uit.no>
+ * 
+ *     Copyright (c) 1998-1999 Dag Brattli, All Rights Reserved.
+ *      
+ *     This program is free software; you can redistribute it and/or 
+ *     modify it under the terms of the GNU General Public License as 
+ *     published by the Free Software Foundation; either version 2 of 
+ *     the License, or (at your option) any later version.
+ *  
+ *     Neither Dag Brattli nor University of Tromsø admit liability nor
+ *     provide warranty for any of this software. This material is 
+ *     provided "AS-IS" and at no charge.
+ *     
+ ********************************************************************/
+
+#ifndef LM_IAS_OBJECT_H
+#define LM_IAS_OBJECT_H
+
+#include <net/irda/irda.h>
+#include <net/irda/irqueue.h>
+
+/* LM-IAS Attribute types */
+#define IAS_MISSING 0
+#define IAS_INTEGER 1
+#define IAS_OCT_SEQ 2
+#define IAS_STRING  3
+
+/* Object ownership of attributes (user or kernel) */
+#define IAS_KERNEL_ATTR	0
+#define IAS_USER_ATTR	1
+
+/*
+ *  LM-IAS Object
+ */
+struct ias_object {
+	irda_queue_t q;     /* Must be first! */
+	magic_t magic;
+	
+	char  *name;
+	int   id;
+	hashbin_t *attribs;
+};
+
+/*
+ *  Values used by LM-IAS attributes
+ */
+struct ias_value {
+        __u8    type;    /* Value description */
+	__u8	owner;	/* Managed from user/kernel space */
+	int     charset; /* Only used by string type */
+        int     len;
+	
+	/* Value */
+	union {
+		int integer;
+		char *string;
+		__u8 *oct_seq;
+	} t;
+};
+
+/*
+ *  Attributes used by LM-IAS objects
+ */
+struct ias_attrib {
+	irda_queue_t q; /* Must be first! */
+	int magic;
+
+        char *name;   	         /* Attribute name */
+	struct ias_value *value; /* Attribute value */
+};
+
+struct ias_object *irias_new_object(char *name, int id);
+void irias_insert_object(struct ias_object *obj);
+int  irias_delete_object(struct ias_object *obj);
+int  irias_delete_attrib(struct ias_object *obj, struct ias_attrib *attrib,
+			 int cleanobject);
+void __irias_delete_object(struct ias_object *obj);
+
+void irias_add_integer_attrib(struct ias_object *obj, char *name, int value,
+			      int user);
+void irias_add_string_attrib(struct ias_object *obj, char *name, char *value,
+			     int user);
+void irias_add_octseq_attrib(struct ias_object *obj, char *name, __u8 *octets,
+			     int len, int user);
+int irias_object_change_attribute(char *obj_name, char *attrib_name, 
+				  struct ias_value *new_value);
+struct ias_object *irias_find_object(char *name);
+struct ias_attrib *irias_find_attrib(struct ias_object *obj, char *name);
+
+struct ias_value *irias_new_string_value(char *string);
+struct ias_value *irias_new_integer_value(int integer);
+struct ias_value *irias_new_octseq_value(__u8 *octseq , int len);
+struct ias_value *irias_new_missing_value(void);
+void irias_delete_value(struct ias_value *value);
+
+extern struct ias_value irias_missing;
+extern hashbin_t *irias_objects;
+
+#endif
diff --git a/drivers/staging/irda/include/net/irda/irlan_client.h b/drivers/staging/irda/include/net/irda/irlan_client.h
new file mode 100644
index 000000000000..fa8455eda280
--- /dev/null
+++ b/drivers/staging/irda/include/net/irda/irlan_client.h
@@ -0,0 +1,42 @@
+/*********************************************************************
+ *                
+ * Filename:      irlan_client.h
+ * Version:       0.3
+ * Description:   IrDA LAN access layer
+ * Status:        Experimental.
+ * Author:        Dag Brattli <dagb@cs.uit.no>
+ * Created at:    Sun Aug 31 20:14:37 1997
+ * Modified at:   Thu Apr 22 14:13:34 1999
+ * Modified by:   Dag Brattli <dagb@cs.uit.no>
+ * 
+ *     Copyright (c) 1998 Dag Brattli <dagb@cs.uit.no>, All Rights Reserved.
+ *     
+ *     This program is free software; you can redistribute it and/or 
+ *     modify it under the terms of the GNU General Public License as 
+ *     published by the Free Software Foundation; either version 2 of 
+ *     the License, or (at your option) any later version.
+ *
+ *     Neither Dag Brattli nor University of Tromsø admit liability nor
+ *     provide warranty for any of this software. This material is 
+ *     provided "AS-IS" and at no charge.
+ *
+ ********************************************************************/
+
+#ifndef IRLAN_CLIENT_H
+#define IRLAN_CLIENT_H
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+
+#include <net/irda/irias_object.h>
+#include <net/irda/irlan_event.h>
+
+void irlan_client_discovery_indication(discinfo_t *, DISCOVERY_MODE, void *);
+void irlan_client_wakeup(struct irlan_cb *self, __u32 saddr, __u32 daddr);
+
+void irlan_client_parse_response(struct irlan_cb *self, struct sk_buff *skb);
+void irlan_client_get_value_confirm(int result, __u16 obj_id, 
+				    struct ias_value *value, void *priv);
+#endif
diff --git a/drivers/staging/irda/include/net/irda/irlan_common.h b/drivers/staging/irda/include/net/irda/irlan_common.h
new file mode 100644
index 000000000000..550c2d6ec7ff
--- /dev/null
+++ b/drivers/staging/irda/include/net/irda/irlan_common.h
@@ -0,0 +1,230 @@
+/*********************************************************************
+ *                
+ * Filename:      irlan_common.h
+ * Version:       0.8
+ * Description:   IrDA LAN access layer
+ * Status:        Experimental.
+ * Author:        Dag Brattli <dagb@cs.uit.no>
+ * Created at:    Sun Aug 31 20:14:37 1997
+ * Modified at:   Sun Oct 31 19:41:24 1999
+ * Modified by:   Dag Brattli <dagb@cs.uit.no>
+ * 
+ *     Copyright (c) 1998-1999 Dag Brattli <dagb@cs.uit.no>, 
+ *     All Rights Reserved.
+ *     
+ *     This program is free software; you can redistribute it and/or 
+ *     modify it under the terms of the GNU General Public License as 
+ *     published by the Free Software Foundation; either version 2 of 
+ *     the License, or (at your option) any later version.
+ *
+ *     Neither Dag Brattli nor University of Tromsø admit liability nor
+ *     provide warranty for any of this software. This material is 
+ *     provided "AS-IS" and at no charge.
+ *
+ ********************************************************************/
+
+#ifndef IRLAN_H
+#define IRLAN_H
+
+#include <asm/param.h>  /* for HZ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/if_ether.h>
+
+#include <net/irda/irttp.h>
+
+#define IRLAN_MTU        1518
+#define IRLAN_TIMEOUT    10*HZ /* 10 seconds */
+
+/* Command packet types */
+#define CMD_GET_PROVIDER_INFO   0
+#define CMD_GET_MEDIA_CHAR      1
+#define CMD_OPEN_DATA_CHANNEL   2
+#define CMD_CLOSE_DATA_CHAN     3
+#define CMD_RECONNECT_DATA_CHAN 4
+#define CMD_FILTER_OPERATION    5
+
+/* Some responses */
+#define RSP_SUCCESS                 0
+#define RSP_INSUFFICIENT_RESOURCES  1
+#define RSP_INVALID_COMMAND_FORMAT  2
+#define RSP_COMMAND_NOT_SUPPORTED   3
+#define RSP_PARAM_NOT_SUPPORTED     4
+#define RSP_VALUE_NOT_SUPPORTED     5
+#define RSP_NOT_OPEN                6
+#define RSP_AUTHENTICATION_REQUIRED 7
+#define RSP_INVALID_PASSWORD        8
+#define RSP_PROTOCOL_ERROR          9
+#define RSP_ASYNCHRONOUS_ERROR    255
+
+/* Media types */
+#define MEDIA_802_3 1
+#define MEDIA_802_5 2
+
+/* Filter parameters */
+#define DATA_CHAN   1
+#define FILTER_TYPE 2
+#define FILTER_MODE 3
+
+/* Filter types */
+#define IRLAN_DIRECTED   0x01
+#define IRLAN_FUNCTIONAL 0x02
+#define IRLAN_GROUP      0x04
+#define IRLAN_MAC_FRAME  0x08
+#define IRLAN_MULTICAST  0x10
+#define IRLAN_BROADCAST  0x20
+#define IRLAN_IPX_SOCKET 0x40
+
+/* Filter modes */
+#define ALL     1
+#define FILTER  2
+#define NONE    3
+
+/* Filter operations */
+#define GET     1
+#define CLEAR   2
+#define ADD     3
+#define REMOVE  4
+#define DYNAMIC 5
+
+/* Access types */
+#define ACCESS_DIRECT  1
+#define ACCESS_PEER    2
+#define ACCESS_HOSTED  3
+
+#define IRLAN_BYTE   0
+#define IRLAN_SHORT  1
+#define IRLAN_ARRAY  2
+
+/* IrLAN sits on top if IrTTP */
+#define IRLAN_MAX_HEADER (TTP_HEADER+LMP_HEADER)
+/* 1 byte for the command code and 1 byte for the parameter count */
+#define IRLAN_CMD_HEADER 2
+
+#define IRLAN_STRING_PARAMETER_LEN(name, value) (1 + strlen((name)) + 2 \
+						+ strlen ((value)))
+#define IRLAN_BYTE_PARAMETER_LEN(name)          (1 + strlen((name)) + 2 + 1)
+#define IRLAN_SHORT_PARAMETER_LEN(name)         (1 + strlen((name)) + 2 + 2)
+
+/*
+ *  IrLAN client
+ */
+struct irlan_client_cb {
+	int state;
+
+	int open_retries;
+
+	struct tsap_cb *tsap_ctrl;
+	__u32 max_sdu_size;
+	__u8  max_header_size;
+	
+	int access_type;         /* Access type of provider */
+	__u8 reconnect_key[255];
+	__u8 key_len;
+	
+	__u16 recv_arb_val;
+	__u16 max_frame;
+	int filter_type;
+
+	int unicast_open;
+	int broadcast_open;
+
+	int tx_busy;
+	struct sk_buff_head txq; /* Transmit control queue */
+
+	struct iriap_cb *iriap;
+
+	struct timer_list kick_timer;
+};
+
+/*
+ * IrLAN provider
+ */
+struct irlan_provider_cb {
+	int state;
+	
+	struct tsap_cb *tsap_ctrl;
+	__u32 max_sdu_size;
+	__u8  max_header_size;
+
+	/*
+	 *  Store some values here which are used by the provider to parse
+	 *  the filter operations
+	 */
+	int data_chan;
+	int filter_type;
+	int filter_mode;
+	int filter_operation;
+	int filter_entry;
+	int access_type;     /* Access type */
+	__u16 send_arb_val;
+
+	__u8 mac_address[ETH_ALEN]; /* Generated MAC address for peer device */
+};
+
+/*
+ *  IrLAN control block
+ */
+struct irlan_cb {
+	int    magic;
+	struct list_head  dev_list;
+	struct net_device *dev;        /* Ethernet device structure*/
+
+	__u32 saddr;               /* Source device address */
+	__u32 daddr;               /* Destination device address */
+	int disconnect_reason;     /* Why we got disconnected */
+	
+	int media;                 /* Media type */
+	__u8 version[2];           /* IrLAN version */
+	
+	struct tsap_cb *tsap_data; /* Data TSAP */
+
+	int  use_udata;            /* Use Unit Data transfers */
+
+	__u8 stsap_sel_data;       /* Source data TSAP selector */
+	__u8 dtsap_sel_data;       /* Destination data TSAP selector */
+	__u8 dtsap_sel_ctrl;       /* Destination ctrl TSAP selector */
+
+	struct irlan_client_cb   client;   /* Client specific fields */
+	struct irlan_provider_cb provider; /* Provider specific fields */
+
+	__u32 max_sdu_size;
+	__u8  max_header_size;
+	
+	wait_queue_head_t open_wait;
+	struct timer_list watchdog_timer;
+};
+
+void irlan_close(struct irlan_cb *self);
+void irlan_close_tsaps(struct irlan_cb *self);
+
+int  irlan_register_netdev(struct irlan_cb *self);
+void irlan_ias_register(struct irlan_cb *self, __u8 tsap_sel);
+void irlan_start_watchdog_timer(struct irlan_cb *self, int timeout);
+
+void irlan_open_data_tsap(struct irlan_cb *self);
+
+int irlan_run_ctrl_tx_queue(struct irlan_cb *self);
+
+struct irlan_cb *irlan_get_any(void);
+void irlan_get_provider_info(struct irlan_cb *self);
+void irlan_get_media_char(struct irlan_cb *self);
+void irlan_open_data_channel(struct irlan_cb *self);
+void irlan_close_data_channel(struct irlan_cb *self);
+void irlan_set_multicast_filter(struct irlan_cb *self, int status);
+void irlan_set_broadcast_filter(struct irlan_cb *self, int status);
+
+int irlan_insert_byte_param(struct sk_buff *skb, char *param, __u8 value);
+int irlan_insert_short_param(struct sk_buff *skb, char *param, __u16 value);
+int irlan_insert_string_param(struct sk_buff *skb, char *param, char *value);
+int irlan_insert_array_param(struct sk_buff *skb, char *name, __u8 *value, 
+			     __u16 value_len);
+
+int irlan_extract_param(__u8 *buf, char *name, char *value, __u16 *len);
+
+#endif
+
+
diff --git a/drivers/staging/irda/include/net/irda/irlan_eth.h b/drivers/staging/irda/include/net/irda/irlan_eth.h
new file mode 100644
index 000000000000..de5c81691f33
--- /dev/null
+++ b/drivers/staging/irda/include/net/irda/irlan_eth.h
@@ -0,0 +1,32 @@
+/*********************************************************************
+ *                
+ * Filename:      irlan_eth.h
+ * Version:       
+ * Description:   
+ * Status:        Experimental.
+ * Author:        Dag Brattli <dagb@cs.uit.no>
+ * Created at:    Thu Oct 15 08:36:58 1998
+ * Modified at:   Fri May 14 23:29:00 1999
+ * Modified by:   Dag Brattli <dagb@cs.uit.no>
+ * 
+ *     Copyright (c) 1998-1999 Dag Brattli, All Rights Reserved.
+ *      
+ *     This program is free software; you can redistribute it and/or 
+ *     modify it under the terms of the GNU General Public License as 
+ *     published by the Free Software Foundation; either version 2 of 
+ *     the License, or (at your option) any later version.
+ *  
+ *     Neither Dag Brattli nor University of Tromsø admit liability nor
+ *     provide warranty for any of this software. This material is 
+ *     provided "AS-IS" and at no charge.
+ *     
+ ********************************************************************/
+
+#ifndef IRLAN_ETH_H
+#define IRLAN_ETH_H
+
+struct net_device *alloc_irlandev(const char *name);
+int  irlan_eth_receive(void *instance, void *sap, struct sk_buff *skb);
+
+void irlan_eth_flow_indication( void *instance, void *sap, LOCAL_FLOW flow);
+#endif
diff --git a/drivers/staging/irda/include/net/irda/irlan_event.h b/drivers/staging/irda/include/net/irda/irlan_event.h
new file mode 100644
index 000000000000..018b5a77e610
--- /dev/null
+++ b/drivers/staging/irda/include/net/irda/irlan_event.h
@@ -0,0 +1,81 @@
+/*********************************************************************
+ *                
+ * Filename:      irlan_event.h
+ * Version:       
+ * Description:   LAN access
+ * Status:        Experimental.
+ * Author:        Dag Brattli <dagb@cs.uit.no>
+ * Created at:    Sun Aug 31 20:14:37 1997
+ * Modified at:   Tue Feb  2 09:45:17 1999
+ * Modified by:   Dag Brattli <dagb@cs.uit.no>
+ * 
+ *     Copyright (c) 1997 Dag Brattli <dagb@cs.uit.no>, All Rights Reserved.
+ *     
+ *     This program is free software; you can redistribute it and/or 
+ *     modify it under the terms of the GNU General Public License as 
+ *     published by the Free Software Foundation; either version 2 of 
+ *     the License, or (at your option) any later version.
+ *
+ *     Neither Dag Brattli nor University of Tromsø admit liability nor
+ *     provide warranty for any of this software. This material is 
+ *     provided "AS-IS" and at no charge.
+ *
+ ********************************************************************/
+
+#ifndef IRLAN_EVENT_H
+#define IRLAN_EVENT_H
+
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+
+#include <net/irda/irlan_common.h>
+
+typedef enum {
+	IRLAN_IDLE,
+	IRLAN_QUERY,
+	IRLAN_CONN, 
+	IRLAN_INFO,
+	IRLAN_MEDIA,
+	IRLAN_OPEN,
+	IRLAN_WAIT,
+	IRLAN_ARB, 
+	IRLAN_DATA,
+	IRLAN_CLOSE,
+	IRLAN_SYNC
+} IRLAN_STATE;
+
+typedef enum {
+	IRLAN_DISCOVERY_INDICATION,
+	IRLAN_IAS_PROVIDER_AVAIL,
+	IRLAN_IAS_PROVIDER_NOT_AVAIL,
+	IRLAN_LAP_DISCONNECT,
+	IRLAN_LMP_DISCONNECT,
+	IRLAN_CONNECT_COMPLETE,
+	IRLAN_DATA_INDICATION,
+	IRLAN_DATA_CONNECT_INDICATION,
+	IRLAN_RETRY_CONNECT,
+
+	IRLAN_CONNECT_INDICATION,
+	IRLAN_GET_INFO_CMD,
+	IRLAN_GET_MEDIA_CMD,
+	IRLAN_OPEN_DATA_CMD,
+	IRLAN_FILTER_CONFIG_CMD,
+
+	IRLAN_CHECK_CON_ARB,
+	IRLAN_PROVIDER_SIGNAL,
+
+	IRLAN_WATCHDOG_TIMEOUT,
+} IRLAN_EVENT;
+
+extern const char * const irlan_state[];
+
+void irlan_do_client_event(struct irlan_cb *self, IRLAN_EVENT event, 
+			   struct sk_buff *skb);
+
+void irlan_do_provider_event(struct irlan_cb *self, IRLAN_EVENT event, 
+			     struct sk_buff *skb);
+
+void irlan_next_client_state(struct irlan_cb *self, IRLAN_STATE state);
+void irlan_next_provider_state(struct irlan_cb *self, IRLAN_STATE state);
+
+#endif
diff --git a/drivers/staging/irda/include/net/irda/irlan_filter.h b/drivers/staging/irda/include/net/irda/irlan_filter.h
new file mode 100644
index 000000000000..a5a2539485bd
--- /dev/null
+++ b/drivers/staging/irda/include/net/irda/irlan_filter.h
@@ -0,0 +1,35 @@
+/*********************************************************************
+ *                
+ * Filename:      irlan_filter.h
+ * Version:       
+ * Description:   
+ * Status:        Experimental.
+ * Author:        Dag Brattli <dagb@cs.uit.no>
+ * Created at:    Fri Jan 29 15:24:08 1999
+ * Modified at:   Sun Feb  7 23:35:31 1999
+ * Modified by:   Dag Brattli <dagb@cs.uit.no>
+ * 
+ *     Copyright (c) 1998 Dag Brattli, All Rights Reserved.
+ *      
+ *     This program is free software; you can redistribute it and/or 
+ *     modify it under the terms of the GNU General Public License as 
+ *     published by the Free Software Foundation; either version 2 of 
+ *     the License, or (at your option) any later version.
+ *  
+ *     Neither Dag Brattli nor University of Tromsø admit liability nor
+ *     provide warranty for any of this software. This material is 
+ *     provided "AS-IS" and at no charge.
+ *     
+ ********************************************************************/
+
+#ifndef IRLAN_FILTER_H
+#define IRLAN_FILTER_H
+
+void irlan_check_command_param(struct irlan_cb *self, char *param, 
+			       char *value);
+void irlan_filter_request(struct irlan_cb *self, struct sk_buff *skb);
+#ifdef CONFIG_PROC_FS
+void irlan_print_filter(struct seq_file *seq, int filter_type);
+#endif
+
+#endif /* IRLAN_FILTER_H */
diff --git a/drivers/staging/irda/include/net/irda/irlan_provider.h b/drivers/staging/irda/include/net/irda/irlan_provider.h
new file mode 100644
index 000000000000..92f3b0e1029b
--- /dev/null
+++ b/drivers/staging/irda/include/net/irda/irlan_provider.h
@@ -0,0 +1,52 @@
+/*********************************************************************
+ *                
+ * Filename:      irlan_provider.h
+ * Version:       0.1
+ * Description:   IrDA LAN access layer
+ * Status:        Experimental.
+ * Author:        Dag Brattli <dagb@cs.uit.no>
+ * Created at:    Sun Aug 31 20:14:37 1997
+ * Modified at:   Sun May  9 12:26:11 1999
+ * Modified by:   Dag Brattli <dagb@cs.uit.no>
+ * 
+ *     Copyright (c) 1998-1999 Dag Brattli <dagb@cs.uit.no>, All Rights Reserved.
+ *     
+ *     This program is free software; you can redistribute it and/or 
+ *     modify it under the terms of the GNU General Public License as 
+ *     published by the Free Software Foundation; either version 2 of 
+ *     the License, or (at your option) any later version.
+ *
+ *     Neither Dag Brattli nor University of Tromsø admit liability nor
+ *     provide warranty for any of this software. This material is 
+ *     provided "AS-IS" and at no charge.
+ *
+ ********************************************************************/
+
+#ifndef IRLAN_SERVER_H
+#define IRLAN_SERVER_H
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+
+#include <net/irda/irlan_common.h>
+
+void irlan_provider_ctrl_disconnect_indication(void *instance, void *sap, 
+					       LM_REASON reason, 
+					       struct sk_buff *skb);
+
+
+void irlan_provider_connect_response(struct irlan_cb *, struct tsap_cb *);
+
+int irlan_parse_open_data_cmd(struct irlan_cb *self, struct sk_buff *skb);
+int irlan_provider_parse_command(struct irlan_cb *self, int cmd,
+				 struct sk_buff *skb);
+
+void irlan_provider_send_reply(struct irlan_cb *self, int command, 
+			       int ret_code);
+int irlan_provider_open_ctrl_tsap(struct irlan_cb *self);
+
+#endif
+
+
diff --git a/drivers/staging/irda/include/net/irda/irlap.h b/drivers/staging/irda/include/net/irda/irlap.h
new file mode 100644
index 000000000000..6f23e820618c
--- /dev/null
+++ b/drivers/staging/irda/include/net/irda/irlap.h
@@ -0,0 +1,311 @@
+/*********************************************************************
+ *                
+ * Filename:      irlap.h
+ * Version:       0.8
+ * Description:   An IrDA LAP driver for Linux
+ * Status:        Experimental.
+ * Author:        Dag Brattli <dagb@cs.uit.no>
+ * Created at:    Mon Aug  4 20:40:53 1997
+ * Modified at:   Fri Dec 10 13:21:17 1999
+ * Modified by:   Dag Brattli <dagb@cs.uit.no>
+ * 
+ *     Copyright (c) 1998-1999 Dag Brattli <dagb@cs.uit.no>, 
+ *     All Rights Reserved.
+ *     Copyright (c) 2000-2002 Jean Tourrilhes <jt@hpl.hp.com>
+ *     
+ *     This program is free software; you can redistribute it and/or 
+ *     modify it under the terms of the GNU General Public License as 
+ *     published by the Free Software Foundation; either version 2 of 
+ *     the License, or (at your option) any later version.
+ *
+ *     Neither Dag Brattli nor University of Tromsø admit liability nor
+ *     provide warranty for any of this software. This material is 
+ *     provided "AS-IS" and at no charge.
+ *
+ ********************************************************************/
+
+#ifndef IRLAP_H
+#define IRLAP_H
+
+#include <linux/types.h>
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/timer.h>
+
+#include <net/irda/irqueue.h>		/* irda_queue_t */
+#include <net/irda/qos.h>		/* struct qos_info */
+#include <net/irda/discovery.h>		/* discovery_t */
+#include <net/irda/irlap_event.h>	/* IRLAP_STATE, ... */
+#include <net/irda/irmod.h>		/* struct notify_t */
+
+#define CONFIG_IRDA_DYNAMIC_WINDOW 1
+
+#define LAP_RELIABLE   1
+#define LAP_UNRELIABLE 0
+
+#define LAP_ADDR_HEADER 1  /* IrLAP Address Header */
+#define LAP_CTRL_HEADER 1  /* IrLAP Control Header */
+
+/* May be different when we get VFIR */
+#define LAP_MAX_HEADER (LAP_ADDR_HEADER + LAP_CTRL_HEADER)
+
+/* Each IrDA device gets a random 32 bits IRLAP device address */
+#define LAP_ALEN 4
+
+#define BROADCAST  0xffffffff /* Broadcast device address */
+#define CBROADCAST 0xfe       /* Connection broadcast address */
+#define XID_FORMAT 0x01       /* Discovery XID format */
+
+/* Nobody seems to use this constant. */
+#define LAP_WINDOW_SIZE 8
+/* We keep the LAP queue very small to minimise the amount of buffering.
+ * this improve latency and reduce resource consumption.
+ * This work only because we have synchronous refilling of IrLAP through
+ * the flow control mechanism (via scheduler and IrTTP).
+ * 2 buffers is the minimum we can work with, one that we send while polling
+ * IrTTP, and another to know that we should not send the pf bit.
+ * Jean II */
+#define LAP_HIGH_THRESHOLD     2
+/* Some rare non TTP clients don't implement flow control, and
+ * so don't comply with the above limit (and neither with this one).
+ * For IAP and management, it doesn't matter, because they never transmit much.
+ *.For IrLPT, this should be fixed.
+ * - Jean II */
+#define LAP_MAX_QUEUE 10
+/* Please note that all IrDA management frames (LMP/TTP conn req/disc and
+ * IAS queries) fall in the second category and are sent to LAP even if TTP
+ * is stopped. This means that those frames will wait only a maximum of
+ * two (2) data frames before beeing sent on the "wire", which speed up
+ * new socket setup when the link is saturated.
+ * Same story for two sockets competing for the medium : if one saturates
+ * the LAP, when the other want to transmit it only has to wait for
+ * maximum three (3) packets (2 + one scheduling), which improve performance
+ * of delay sensitive applications.
+ * Jean II */
+
+#define NR_EXPECTED     1
+#define NR_UNEXPECTED   0
+#define NR_INVALID     -1
+
+#define NS_EXPECTED     1
+#define NS_UNEXPECTED   0
+#define NS_INVALID     -1
+
+/*
+ *  Meta information passed within the IrLAP state machine
+ */
+struct irlap_info {
+	__u8 caddr;   /* Connection address */
+	__u8 control; /* Frame type */
+        __u8 cmd;
+
+	__u32 saddr;
+	__u32 daddr;
+	
+	int pf;        /* Poll/final bit set */
+
+	__u8  nr;      /* Sequence number of next frame expected */
+	__u8  ns;      /* Sequence number of frame sent */
+
+	int  S;        /* Number of slots */
+	int  slot;     /* Random chosen slot */
+	int  s;        /* Current slot */
+
+	discovery_t *discovery; /* Discovery information */
+};
+
+/* Main structure of IrLAP */
+struct irlap_cb {
+	irda_queue_t q;     /* Must be first */
+	magic_t magic;
+
+	/* Device we are attached to */
+	struct net_device  *netdev;
+	char		hw_name[2*IFNAMSIZ + 1];
+
+	/* Connection state */
+	volatile IRLAP_STATE state;       /* Current state */
+
+	/* Timers used by IrLAP */
+	struct timer_list query_timer;
+	struct timer_list slot_timer;
+	struct timer_list discovery_timer;
+	struct timer_list final_timer;
+	struct timer_list poll_timer;
+	struct timer_list wd_timer;
+	struct timer_list backoff_timer;
+
+	/* Media busy stuff */
+	struct timer_list media_busy_timer;
+	int media_busy;
+
+	/* Timeouts which will be different with different turn time */
+	int slot_timeout;
+	int poll_timeout;
+	int final_timeout;
+	int wd_timeout;
+
+	struct sk_buff_head txq;  /* Frames to be transmitted */
+	struct sk_buff_head txq_ultra;
+
+ 	__u8    caddr;        /* Connection address */
+	__u32   saddr;        /* Source device address */
+	__u32   daddr;        /* Destination device address */
+
+	int     retry_count;  /* Times tried to establish connection */
+	int     add_wait;     /* True if we are waiting for frame */
+
+	__u8    connect_pending;
+	__u8    disconnect_pending;
+
+	/*  To send a faster RR if tx queue empty */
+#ifdef CONFIG_IRDA_FAST_RR
+	int     fast_RR_timeout;
+	int     fast_RR;      
+#endif /* CONFIG_IRDA_FAST_RR */
+	
+	int N1; /* N1 * F-timer = Negitiated link disconnect warning threshold */
+	int N2; /* N2 * F-timer = Negitiated link disconnect time */
+	int N3; /* Connection retry count */
+
+	int     local_busy;
+	int     remote_busy;
+	int     xmitflag;
+
+	__u8    vs;            /* Next frame to be sent */
+	__u8    vr;            /* Next frame to be received */
+	__u8    va;            /* Last frame acked */
+ 	int     window;        /* Nr of I-frames allowed to send */
+	int     window_size;   /* Current negotiated window size */
+
+#ifdef CONFIG_IRDA_DYNAMIC_WINDOW
+	__u32   line_capacity; /* Number of bytes allowed to send */
+	__u32   bytes_left;    /* Number of bytes still allowed to transmit */
+#endif /* CONFIG_IRDA_DYNAMIC_WINDOW */
+
+	struct sk_buff_head wx_list;
+
+	__u8    ack_required;
+	
+	/* XID parameters */
+ 	__u8    S;           /* Number of slots */
+	__u8    slot;        /* Random chosen slot */
+ 	__u8    s;           /* Current slot */
+	int     frame_sent;  /* Have we sent reply? */
+
+	hashbin_t   *discovery_log;
+ 	discovery_t *discovery_cmd;
+
+	__u32 speed;		/* Link speed */
+
+	struct qos_info  qos_tx;   /* QoS requested by peer */
+	struct qos_info  qos_rx;   /* QoS requested by self */
+	struct qos_info *qos_dev;  /* QoS supported by device */
+
+	notify_t notify; /* Callbacks to IrLMP */
+
+	int    mtt_required;  /* Minimum turnaround time required */
+	int    xbofs_delay;   /* Nr of XBOF's used to MTT */
+	int    bofs_count;    /* Negotiated extra BOFs */
+	int    next_bofs;     /* Negotiated extra BOFs after next frame */
+
+	int    mode;     /* IrLAP mode (primary, secondary or monitor) */
+};
+
+/* 
+ *  Function prototypes 
+ */
+int irlap_init(void);
+void irlap_cleanup(void);
+
+struct irlap_cb *irlap_open(struct net_device *dev, struct qos_info *qos,
+			    const char *hw_name);
+void irlap_close(struct irlap_cb *self);
+
+void irlap_connect_request(struct irlap_cb *self, __u32 daddr, 
+			   struct qos_info *qos, int sniff);
+void irlap_connect_response(struct irlap_cb *self, struct sk_buff *skb);
+void irlap_connect_indication(struct irlap_cb *self, struct sk_buff *skb);
+void irlap_connect_confirm(struct irlap_cb *, struct sk_buff *skb);
+
+void irlap_data_indication(struct irlap_cb *, struct sk_buff *, int unreliable);
+void irlap_data_request(struct irlap_cb *, struct sk_buff *, int unreliable);
+
+#ifdef CONFIG_IRDA_ULTRA
+void irlap_unitdata_request(struct irlap_cb *, struct sk_buff *);
+void irlap_unitdata_indication(struct irlap_cb *, struct sk_buff *);
+#endif /* CONFIG_IRDA_ULTRA */
+
+void irlap_disconnect_request(struct irlap_cb *);
+void irlap_disconnect_indication(struct irlap_cb *, LAP_REASON reason);
+
+void irlap_status_indication(struct irlap_cb *, int quality_of_link);
+
+void irlap_test_request(__u8 *info, int len);
+
+void irlap_discovery_request(struct irlap_cb *, discovery_t *discovery);
+void irlap_discovery_confirm(struct irlap_cb *, hashbin_t *discovery_log);
+void irlap_discovery_indication(struct irlap_cb *, discovery_t *discovery);
+
+void irlap_reset_indication(struct irlap_cb *self);
+void irlap_reset_confirm(void);
+
+void irlap_update_nr_received(struct irlap_cb *, int nr);
+int irlap_validate_nr_received(struct irlap_cb *, int nr);
+int irlap_validate_ns_received(struct irlap_cb *, int ns);
+
+int  irlap_generate_rand_time_slot(int S, int s);
+void irlap_initiate_connection_state(struct irlap_cb *);
+void irlap_flush_all_queues(struct irlap_cb *);
+void irlap_wait_min_turn_around(struct irlap_cb *, struct qos_info *);
+
+void irlap_apply_default_connection_parameters(struct irlap_cb *self);
+void irlap_apply_connection_parameters(struct irlap_cb *self, int now);
+
+#define IRLAP_GET_HEADER_SIZE(self) (LAP_MAX_HEADER)
+#define IRLAP_GET_TX_QUEUE_LEN(self) skb_queue_len(&self->txq)
+
+/* Return TRUE if the node is in primary mode (i.e. master)
+ * - Jean II */
+static inline int irlap_is_primary(struct irlap_cb *self)
+{
+	int ret;
+	switch(self->state) {
+	case LAP_XMIT_P:
+	case LAP_NRM_P:
+		ret = 1;
+		break;
+	case LAP_XMIT_S:
+	case LAP_NRM_S:
+		ret = 0;
+		break;
+	default:
+		ret = -1;
+	}
+	return ret;
+}
+
+/* Clear a pending IrLAP disconnect. - Jean II */
+static inline void irlap_clear_disconnect(struct irlap_cb *self)
+{
+	self->disconnect_pending = FALSE;
+}
+
+/*
+ * Function irlap_next_state (self, state)
+ *
+ *    Switches state and provides debug information
+ *
+ */
+static inline void irlap_next_state(struct irlap_cb *self, IRLAP_STATE state)
+{
+	/*
+	if (!self || self->magic != LAP_MAGIC)
+		return;
+
+		pr_debug("next LAP state = %s\n", irlap_state[state]);
+	*/
+	self->state = state;
+}
+
+#endif
diff --git a/drivers/staging/irda/include/net/irda/irlap_event.h b/drivers/staging/irda/include/net/irda/irlap_event.h
new file mode 100644
index 000000000000..e4325fee1267
--- /dev/null
+++ b/drivers/staging/irda/include/net/irda/irlap_event.h
@@ -0,0 +1,129 @@
+/*********************************************************************
+ *                
+ *                
+ * Filename:      irlap_event.h
+ * Version:       0.1
+ * Description:   
+ * Status:        Experimental.
+ * Author:        Dag Brattli <dagb@cs.uit.no>
+ * Created at:    Sat Aug 16 00:59:29 1997
+ * Modified at:   Tue Dec 21 11:20:30 1999
+ * Modified by:   Dag Brattli <dagb@cs.uit.no>
+ * 
+ *     Copyright (c) 1998-1999 Dag Brattli <dagb@cs.uit.no>, 
+ *     All Rights Reserved.
+ *     Copyright (c) 2000-2002 Jean Tourrilhes <jt@hpl.hp.com>
+ *     
+ *     This program is free software; you can redistribute it and/or 
+ *     modify it under the terms of the GNU General Public License as 
+ *     published by the Free Software Foundation; either version 2 of 
+ *     the License, or (at your option) any later version.
+ * 
+ *     This program is distributed in the hope that it will be useful,
+ *     but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *     GNU General Public License for more details.
+ * 
+ *     You should have received a copy of the GNU General Public License 
+ *     along with this program; if not, see <http://www.gnu.org/licenses/>.
+ *     
+ ********************************************************************/
+
+#ifndef IRLAP_EVENT_H
+#define IRLAP_EVENT_H
+
+#include <net/irda/irda.h>
+
+/* A few forward declarations (to make compiler happy) */
+struct irlap_cb;
+struct irlap_info;
+
+/* IrLAP States */
+typedef enum {
+	LAP_NDM,         /* Normal disconnected mode */
+	LAP_QUERY,
+	LAP_REPLY,
+	LAP_CONN,        /* Connect indication */
+	LAP_SETUP,       /* Setting up connection */
+	LAP_OFFLINE,     /* A really boring state */
+	LAP_XMIT_P,
+	LAP_PCLOSE,
+	LAP_NRM_P,       /* Normal response mode as primary */
+	LAP_RESET_WAIT,
+	LAP_RESET,
+	LAP_NRM_S,       /* Normal response mode as secondary */
+	LAP_XMIT_S,
+	LAP_SCLOSE,
+	LAP_RESET_CHECK,
+} IRLAP_STATE;
+
+/* IrLAP Events */
+typedef enum {
+	/* Services events */
+	DISCOVERY_REQUEST,
+	CONNECT_REQUEST,
+	CONNECT_RESPONSE,
+	DISCONNECT_REQUEST,
+	DATA_REQUEST,
+	RESET_REQUEST,
+	RESET_RESPONSE,
+
+	/* Send events */
+	SEND_I_CMD,
+	SEND_UI_FRAME,
+
+	/* Receive events */
+	RECV_DISCOVERY_XID_CMD,
+	RECV_DISCOVERY_XID_RSP,
+	RECV_SNRM_CMD,
+	RECV_TEST_CMD,
+	RECV_TEST_RSP,
+	RECV_UA_RSP,
+	RECV_DM_RSP,
+	RECV_RD_RSP,
+	RECV_I_CMD,
+	RECV_I_RSP,
+	RECV_UI_FRAME,
+	RECV_FRMR_RSP,
+	RECV_RR_CMD,
+	RECV_RR_RSP,
+	RECV_RNR_CMD,
+	RECV_RNR_RSP,
+	RECV_REJ_CMD,
+	RECV_REJ_RSP,
+	RECV_SREJ_CMD,
+	RECV_SREJ_RSP,
+	RECV_DISC_CMD,
+
+	/* Timer events */
+	SLOT_TIMER_EXPIRED,
+	QUERY_TIMER_EXPIRED,
+	FINAL_TIMER_EXPIRED,
+	POLL_TIMER_EXPIRED,
+	DISCOVERY_TIMER_EXPIRED,
+	WD_TIMER_EXPIRED,
+	BACKOFF_TIMER_EXPIRED,
+	MEDIA_BUSY_TIMER_EXPIRED,
+} IRLAP_EVENT;
+
+/*
+ * Disconnect reason code
+ */
+typedef enum { /* FIXME check the two first reason codes */
+	LAP_DISC_INDICATION=1, /* Received a disconnect request from peer */
+	LAP_NO_RESPONSE,       /* To many retransmits without response */
+	LAP_RESET_INDICATION,  /* To many retransmits, or invalid nr/ns */
+	LAP_FOUND_NONE,        /* No devices were discovered */
+	LAP_MEDIA_BUSY,
+	LAP_PRIMARY_CONFLICT,
+} LAP_REASON;
+
+extern const char *const irlap_state[];
+
+void irlap_do_event(struct irlap_cb *self, IRLAP_EVENT event, 
+		    struct sk_buff *skb, struct irlap_info *info);
+void irlap_print_event(IRLAP_EVENT event);
+
+int irlap_qos_negotiate(struct irlap_cb *self, struct sk_buff *skb);
+
+#endif
diff --git a/drivers/staging/irda/include/net/irda/irlap_frame.h b/drivers/staging/irda/include/net/irda/irlap_frame.h
new file mode 100644
index 000000000000..cbc12a926e5f
--- /dev/null
+++ b/drivers/staging/irda/include/net/irda/irlap_frame.h
@@ -0,0 +1,167 @@
+/*********************************************************************
+ *                
+ * Filename:      irlap_frame.h
+ * Version:       0.9
+ * Description:   IrLAP frame declarations
+ * Status:        Experimental.
+ * Author:        Dag Brattli <dagb@cs.uit.no>
+ * Created at:    Tue Aug 19 10:27:26 1997
+ * Modified at:   Sat Dec 25 21:07:26 1999
+ * Modified by:   Dag Brattli <dagb@cs.uit.no>
+ * 
+ *     Copyright (c) 1997-1999 Dag Brattli <dagb@cs.uit.no>,
+ *     All Rights Reserved.
+ *     Copyright (c) 2000-2002 Jean Tourrilhes <jt@hpl.hp.com>
+ *     
+ *     This program is free software; you can redistribute it and/or 
+ *     modify it under the terms of the GNU General Public License as 
+ *     published by the Free Software Foundation; either version 2 of 
+ *     the License, or (at your option) any later version.
+ * 
+ *     This program is distributed in the hope that it will be useful,
+ *     but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *     GNU General Public License for more details.
+ * 
+ *     You should have received a copy of the GNU General Public License 
+ *     along with this program; if not, see <http://www.gnu.org/licenses/>.
+ *     
+ ********************************************************************/
+
+#ifndef IRLAP_FRAME_H
+#define IRLAP_FRAME_H
+
+#include <linux/skbuff.h>
+
+#include <net/irda/irda.h>
+
+/* A few forward declarations (to make compiler happy) */
+struct irlap_cb;
+struct discovery_t;
+
+/* Frame types and templates */
+#define INVALID   0xff
+
+/* Unnumbered (U) commands */
+#define SNRM_CMD  0x83 /* Set Normal Response Mode */
+#define DISC_CMD  0x43 /* Disconnect */
+#define XID_CMD   0x2f /* Exchange Station Identification */
+#define TEST_CMD  0xe3 /* Test */
+
+/* Unnumbered responses */
+#define RNRM_RSP  0x83 /* Request Normal Response Mode */
+#define UA_RSP    0x63 /* Unnumbered Acknowledgement */
+#define FRMR_RSP  0x87 /* Frame Reject */
+#define DM_RSP    0x0f /* Disconnect Mode */
+#define RD_RSP    0x43 /* Request Disconnection */
+#define XID_RSP   0xaf /* Exchange Station Identification */
+#define TEST_RSP  0xe3 /* Test frame */
+
+/* Supervisory (S) */
+#define RR        0x01 /* Receive Ready */
+#define REJ       0x09 /* Reject */
+#define RNR       0x05 /* Receive Not Ready */
+#define SREJ      0x0d /* Selective Reject */
+
+/* Information (I) */
+#define I_FRAME   0x00 /* Information Format */
+#define UI_FRAME  0x03 /* Unnumbered Information */
+
+#define CMD_FRAME 0x01
+#define RSP_FRAME 0x00
+
+#define PF_BIT    0x10 /* Poll/final bit */
+
+/* Some IrLAP field lengths */
+/*
+ * Only baud rate triplet is 4 bytes (PV can be 2 bytes).
+ * All others params (7) are 3 bytes, so that's 7*3 + 1*4 bytes.
+ */
+#define IRLAP_NEGOCIATION_PARAMS_LEN 25
+#define IRLAP_DISCOVERY_INFO_LEN     32
+
+struct disc_frame {
+	__u8 caddr;          /* Connection address */
+	__u8 control;
+} __packed;
+
+struct xid_frame {
+	__u8  caddr; /* Connection address */
+	__u8  control;
+	__u8  ident; /* Should always be XID_FORMAT */ 
+	__le32 saddr; /* Source device address */
+	__le32 daddr; /* Destination device address */
+	__u8  flags; /* Discovery flags */
+	__u8  slotnr;
+	__u8  version;
+} __packed;
+
+struct test_frame {
+	__u8 caddr;          /* Connection address */
+	__u8 control;
+	__le32 saddr;         /* Source device address */
+	__le32 daddr;         /* Destination device address */
+} __packed;
+
+struct ua_frame {
+	__u8 caddr;
+	__u8 control;
+	__le32 saddr; /* Source device address */
+	__le32 daddr; /* Dest device address */
+} __packed;
+
+struct dm_frame {
+	__u8 caddr;          /* Connection address */
+	__u8 control;
+} __packed;
+
+struct rd_frame {
+	__u8 caddr;          /* Connection address */
+	__u8 control;
+} __packed;
+
+struct rr_frame {
+	__u8 caddr;          /* Connection address */
+	__u8 control;
+} __packed;
+
+struct i_frame {
+	__u8 caddr;
+	__u8 control;
+} __packed;
+
+struct snrm_frame {
+	__u8  caddr;
+	__u8  control;
+	__le32 saddr;
+	__le32 daddr;
+	__u8  ncaddr;
+} __packed;
+
+void irlap_queue_xmit(struct irlap_cb *self, struct sk_buff *skb);
+void irlap_send_discovery_xid_frame(struct irlap_cb *, int S, __u8 s, 
+				    __u8 command,
+				    struct discovery_t *discovery);
+void irlap_send_snrm_frame(struct irlap_cb *, struct qos_info *);
+void irlap_send_test_frame(struct irlap_cb *self, __u8 caddr, __u32 daddr, 
+			   struct sk_buff *cmd);
+void irlap_send_ua_response_frame(struct irlap_cb *, struct qos_info *);
+void irlap_send_dm_frame(struct irlap_cb *self);
+void irlap_send_rd_frame(struct irlap_cb *self);
+void irlap_send_disc_frame(struct irlap_cb *self);
+void irlap_send_rr_frame(struct irlap_cb *self, int command);
+
+void irlap_send_data_primary(struct irlap_cb *, struct sk_buff *);
+void irlap_send_data_primary_poll(struct irlap_cb *, struct sk_buff *);
+void irlap_send_data_secondary(struct irlap_cb *, struct sk_buff *);
+void irlap_send_data_secondary_final(struct irlap_cb *, struct sk_buff *);
+void irlap_resend_rejected_frames(struct irlap_cb *, int command);
+void irlap_resend_rejected_frame(struct irlap_cb *self, int command);
+
+void irlap_send_ui_frame(struct irlap_cb *self, struct sk_buff *skb,
+			 __u8 caddr, int command);
+
+int irlap_insert_qos_negotiation_params(struct irlap_cb *self,
+					struct sk_buff *skb);
+
+#endif
diff --git a/drivers/staging/irda/include/net/irda/irlmp.h b/drivers/staging/irda/include/net/irda/irlmp.h
new file mode 100644
index 000000000000..f132924cc9da
--- /dev/null
+++ b/drivers/staging/irda/include/net/irda/irlmp.h
@@ -0,0 +1,295 @@
+/*********************************************************************
+ *                
+ * Filename:      irlmp.h
+ * Version:       0.9
+ * Description:   IrDA Link Management Protocol (LMP) layer
+ * Status:        Experimental.
+ * Author:        Dag Brattli <dagb@cs.uit.no>
+ * Created at:    Sun Aug 17 20:54:32 1997
+ * Modified at:   Fri Dec 10 13:23:01 1999
+ * Modified by:   Dag Brattli <dagb@cs.uit.no>
+ * 
+ *     Copyright (c) 1998-1999 Dag Brattli <dagb@cs.uit.no>, 
+ *     All Rights Reserved.
+ *     Copyright (c) 2000-2002 Jean Tourrilhes <jt@hpl.hp.com>
+ *     
+ *     This program is free software; you can redistribute it and/or 
+ *     modify it under the terms of the GNU General Public License as 
+ *     published by the Free Software Foundation; either version 2 of 
+ *     the License, or (at your option) any later version.
+ *
+ *     Neither Dag Brattli nor University of Tromsø admit liability nor
+ *     provide warranty for any of this software. This material is 
+ *     provided "AS-IS" and at no charge.
+ *
+ ********************************************************************/
+
+#ifndef IRLMP_H
+#define IRLMP_H
+
+#include <asm/param.h>  /* for HZ */
+
+#include <linux/types.h>
+
+#include <net/irda/irda.h>
+#include <net/irda/qos.h>
+#include <net/irda/irlap.h>		/* LAP_MAX_HEADER, ... */
+#include <net/irda/irlmp_event.h>
+#include <net/irda/irqueue.h>
+#include <net/irda/discovery.h>
+
+/* LSAP-SEL's */
+#define LSAP_MASK     0x7f
+#define LSAP_IAS      0x00
+#define LSAP_ANY      0xff
+#define LSAP_MAX      0x6f /* 0x70-0x7f are reserved */
+#define LSAP_CONNLESS 0x70 /* Connectionless LSAP, mostly used for Ultra */
+
+#define DEV_ADDR_ANY  0xffffffff
+
+#define LMP_HEADER          2    /* Dest LSAP + Source LSAP */
+#define LMP_CONTROL_HEADER  4    /* LMP_HEADER + opcode + parameter */
+#define LMP_PID_HEADER      1    /* Used by Ultra */
+#define LMP_MAX_HEADER      (LMP_CONTROL_HEADER+LAP_MAX_HEADER)
+
+#define LM_MAX_CONNECTIONS  10
+
+#define LM_IDLE_TIMEOUT     2*HZ /* 2 seconds for now */
+
+typedef enum {
+	S_PNP = 0,
+	S_PDA,
+	S_COMPUTER,
+	S_PRINTER,
+	S_MODEM,
+	S_FAX,
+	S_LAN,
+	S_TELEPHONY,
+	S_COMM,
+	S_OBEX,
+	S_ANY,
+	S_END,
+} SERVICE;
+
+/* For selective discovery */
+typedef void (*DISCOVERY_CALLBACK1) (discinfo_t *, DISCOVERY_MODE, void *);
+/* For expiry (the same) */
+typedef void (*DISCOVERY_CALLBACK2) (discinfo_t *, DISCOVERY_MODE, void *);
+
+typedef struct {
+	irda_queue_t queue; /* Must be first */
+
+	__u16_host_order hints; /* Hint bits */
+} irlmp_service_t;
+
+typedef struct {
+	irda_queue_t queue; /* Must be first */
+
+	__u16_host_order hint_mask;
+
+	DISCOVERY_CALLBACK1 disco_callback;	/* Selective discovery */
+	DISCOVERY_CALLBACK2 expir_callback;	/* Selective expiration */
+	void *priv;                /* Used to identify client */
+} irlmp_client_t;
+
+/*
+ *  Information about each logical LSAP connection
+ */
+struct lsap_cb {
+	irda_queue_t queue;      /* Must be first */
+	magic_t magic;
+
+	unsigned long connected;	/* set_bit used on this */
+	int  persistent;
+
+	__u8 slsap_sel;   /* Source (this) LSAP address */
+	__u8 dlsap_sel;   /* Destination LSAP address (if connected) */
+#ifdef CONFIG_IRDA_ULTRA
+	__u8 pid;         /* Used by connectionless LSAP */
+#endif /* CONFIG_IRDA_ULTRA */
+	struct sk_buff *conn_skb; /* Store skb here while connecting */
+
+	struct timer_list watchdog_timer;
+
+	LSAP_STATE      lsap_state;  /* Connection state */
+	notify_t        notify;      /* Indication/Confirm entry points */
+	struct qos_info qos;         /* QoS for this connection */
+
+	struct lap_cb *lap; /* Pointer to LAP connection structure */
+};
+
+/*
+ *  Used for caching the last slsap->dlsap->handle mapping
+ *
+ * We don't need to keep/match the remote address in the cache because
+ * we are associated with a specific LAP (which implies it).
+ * Jean II
+ */
+typedef struct {
+	int valid;
+
+	__u8 slsap_sel;
+	__u8 dlsap_sel;
+	struct lsap_cb *lsap;
+} CACHE_ENTRY;
+
+/*
+ *  Information about each registered IrLAP layer
+ */
+struct lap_cb {
+	irda_queue_t queue; /* Must be first */
+	magic_t magic;
+
+	int reason;    /* LAP disconnect reason */
+
+	IRLMP_STATE lap_state;
+
+	struct irlap_cb *irlap;   /* Instance of IrLAP layer */
+	hashbin_t *lsaps;         /* LSAP associated with this link */
+	struct lsap_cb *flow_next;	/* Next lsap to be polled for Tx */
+
+	__u8  caddr;  /* Connection address */
+ 	__u32 saddr;  /* Source device address */
+ 	__u32 daddr;  /* Destination device address */
+	
+	struct qos_info *qos;  /* LAP QoS for this session */
+	struct timer_list idle_timer;
+	
+#ifdef CONFIG_IRDA_CACHE_LAST_LSAP
+	/* The lsap cache was moved from struct irlmp_cb to here because
+	 * it must be associated with the specific LAP. Also, this
+	 * improves performance. - Jean II */
+	CACHE_ENTRY cache;  /* Caching last slsap->dlsap->handle mapping */
+#endif
+};
+
+/*
+ *  Main structure for IrLMP
+ */
+struct irlmp_cb {
+	magic_t magic;
+
+	__u8 conflict_flag;
+	
+	discovery_t discovery_cmd; /* Discovery command to use by IrLAP */
+	discovery_t discovery_rsp; /* Discovery response to use by IrLAP */
+
+	/* Last lsap picked automatically by irlmp_find_free_slsap() */
+	int	last_lsap_sel;
+
+	struct timer_list discovery_timer;
+
+ 	hashbin_t *links;         /* IrLAP connection table */
+	hashbin_t *unconnected_lsaps;
+ 	hashbin_t *clients;
+	hashbin_t *services;
+
+	hashbin_t *cachelog;	/* Current discovery log */
+
+	int running;
+
+	__u16_host_order hints; /* Hint bits */
+};
+
+/* Prototype declarations */
+int  irlmp_init(void);
+void irlmp_cleanup(void);
+struct lsap_cb *irlmp_open_lsap(__u8 slsap, notify_t *notify, __u8 pid);
+void irlmp_close_lsap( struct lsap_cb *self);
+
+__u16 irlmp_service_to_hint(int service);
+void *irlmp_register_service(__u16 hints);
+int irlmp_unregister_service(void *handle);
+void *irlmp_register_client(__u16 hint_mask, DISCOVERY_CALLBACK1 disco_clb,
+			    DISCOVERY_CALLBACK2 expir_clb, void *priv);
+int irlmp_unregister_client(void *handle);
+int irlmp_update_client(void *handle, __u16 hint_mask, 
+			DISCOVERY_CALLBACK1 disco_clb,
+			DISCOVERY_CALLBACK2 expir_clb, void *priv);
+
+void irlmp_register_link(struct irlap_cb *, __u32 saddr, notify_t *);
+void irlmp_unregister_link(__u32 saddr);
+
+int  irlmp_connect_request(struct lsap_cb *, __u8 dlsap_sel, 
+			   __u32 saddr, __u32 daddr,
+			   struct qos_info *, struct sk_buff *);
+void irlmp_connect_indication(struct lsap_cb *self, struct sk_buff *skb);
+int  irlmp_connect_response(struct lsap_cb *, struct sk_buff *);
+void irlmp_connect_confirm(struct lsap_cb *, struct sk_buff *);
+struct lsap_cb *irlmp_dup(struct lsap_cb *self, void *instance);
+
+void irlmp_disconnect_indication(struct lsap_cb *self, LM_REASON reason, 
+				 struct sk_buff *userdata);
+int  irlmp_disconnect_request(struct lsap_cb *, struct sk_buff *userdata);
+
+void irlmp_discovery_confirm(hashbin_t *discovery_log, DISCOVERY_MODE mode);
+void irlmp_discovery_request(int nslots);
+discinfo_t *irlmp_get_discoveries(int *pn, __u16 mask, int nslots);
+void irlmp_do_expiry(void);
+void irlmp_do_discovery(int nslots);
+discovery_t *irlmp_get_discovery_response(void);
+void irlmp_discovery_expiry(discinfo_t *expiry, int number);
+
+int  irlmp_data_request(struct lsap_cb *, struct sk_buff *);
+void irlmp_data_indication(struct lsap_cb *, struct sk_buff *);
+
+int  irlmp_udata_request(struct lsap_cb *, struct sk_buff *);
+void irlmp_udata_indication(struct lsap_cb *, struct sk_buff *);
+
+#ifdef CONFIG_IRDA_ULTRA
+int  irlmp_connless_data_request(struct lsap_cb *, struct sk_buff *, __u8);
+void irlmp_connless_data_indication(struct lsap_cb *, struct sk_buff *);
+#endif /* CONFIG_IRDA_ULTRA */
+
+void irlmp_status_indication(struct lap_cb *, LINK_STATUS link, LOCK_STATUS lock);
+void irlmp_flow_indication(struct lap_cb *self, LOCAL_FLOW flow);
+
+LM_REASON irlmp_convert_lap_reason(LAP_REASON);
+
+static inline __u32 irlmp_get_saddr(const struct lsap_cb *self)
+{
+	return (self && self->lap) ? self->lap->saddr : 0;
+}
+
+static inline __u32 irlmp_get_daddr(const struct lsap_cb *self)
+{
+	return (self && self->lap) ? self->lap->daddr : 0;
+}
+
+const char *irlmp_reason_str(LM_REASON reason);
+
+extern int sysctl_discovery_timeout;
+extern int sysctl_discovery_slots;
+extern int sysctl_discovery;
+extern int sysctl_lap_keepalive_time;	/* in ms, default is LM_IDLE_TIMEOUT */
+extern struct irlmp_cb *irlmp;
+
+/* Check if LAP queue is full.
+ * Used by IrTTP for low control, see comments in irlap.h - Jean II */
+static inline int irlmp_lap_tx_queue_full(struct lsap_cb *self)
+{
+	if (self == NULL)
+		return 0;
+	if (self->lap == NULL)
+		return 0;
+	if (self->lap->irlap == NULL)
+		return 0;
+
+	return IRLAP_GET_TX_QUEUE_LEN(self->lap->irlap) >= LAP_HIGH_THRESHOLD;
+}
+
+/* After doing a irlmp_dup(), this get one of the two socket back into
+ * a state where it's waiting incoming connections.
+ * Note : this can be used *only* if the socket is not yet connected
+ * (i.e. NO irlmp_connect_response() done on this socket).
+ * - Jean II */
+static inline void irlmp_listen(struct lsap_cb *self)
+{
+	self->dlsap_sel = LSAP_ANY;
+	self->lap = NULL;
+	self->lsap_state = LSAP_DISCONNECTED;
+	/* Started when we received the LM_CONNECT_INDICATION */
+	del_timer(&self->watchdog_timer);
+}
+
+#endif
diff --git a/drivers/staging/irda/include/net/irda/irlmp_event.h b/drivers/staging/irda/include/net/irda/irlmp_event.h
new file mode 100644
index 000000000000..9e4ec17a7449
--- /dev/null
+++ b/drivers/staging/irda/include/net/irda/irlmp_event.h
@@ -0,0 +1,98 @@
+/*********************************************************************
+ *                
+ * Filename:      irlmp_event.h
+ * Version:       0.1
+ * Description:   IrDA-LMP event handling
+ * Status:        Experimental.
+ * Author:        Dag Brattli <dagb@cs.uit.no>
+ * Created at:    Mon Aug  4 20:40:53 1997
+ * Modified at:   Thu Jul  8 12:18:54 1999
+ * Modified by:   Dag Brattli <dagb@cs.uit.no>
+ * 
+ *     Copyright (c) 1997, 1999 Dag Brattli <dagb@cs.uit.no>, 
+ *     All Rights Reserved.
+ *     Copyright (c) 2000-2002 Jean Tourrilhes <jt@hpl.hp.com>
+ *     
+ *     This program is free software; you can redistribute it and/or 
+ *     modify it under the terms of the GNU General Public License as 
+ *     published by the Free Software Foundation; either version 2 of 
+ *     the License, or (at your option) any later version.
+ *
+ *     Neither Dag Brattli nor University of Tromsø admit liability nor
+ *     provide warranty for any of this software. This material is 
+ *     provided "AS-IS" and at no charge.
+ *
+ ********************************************************************/
+
+#ifndef IRLMP_EVENT_H
+#define IRLMP_EVENT_H
+
+/* A few forward declarations (to make compiler happy) */
+struct irlmp_cb;
+struct lsap_cb;
+struct lap_cb;
+struct discovery_t;
+
+/* LAP states */
+typedef enum {
+	/* IrLAP connection control states */
+	LAP_STANDBY,             /* No LAP connection */
+	LAP_U_CONNECT,           /* Starting LAP connection */
+	LAP_ACTIVE,              /* LAP connection is active */
+} IRLMP_STATE;
+
+/* LSAP connection control states */
+typedef enum {
+	LSAP_DISCONNECTED,        /* No LSAP connection */
+	LSAP_CONNECT,             /* Connect indication from peer */
+	LSAP_CONNECT_PEND,        /* Connect request from service user */
+	LSAP_DATA_TRANSFER_READY, /* LSAP connection established */          
+	LSAP_SETUP,               /* Trying to set up LSAP connection */
+	LSAP_SETUP_PEND,          /* Request to start LAP connection */
+} LSAP_STATE;
+
+typedef enum {
+	/* LSAP events */
+ 	LM_CONNECT_REQUEST,
+ 	LM_CONNECT_CONFIRM,
+	LM_CONNECT_RESPONSE,
+ 	LM_CONNECT_INDICATION, 	
+	
+	LM_DISCONNECT_INDICATION,
+	LM_DISCONNECT_REQUEST,
+
+ 	LM_DATA_REQUEST,
+	LM_UDATA_REQUEST,
+ 	LM_DATA_INDICATION,
+	LM_UDATA_INDICATION,
+
+	LM_WATCHDOG_TIMEOUT,
+
+	/* IrLAP events */
+	LM_LAP_CONNECT_REQUEST,
+ 	LM_LAP_CONNECT_INDICATION, 
+ 	LM_LAP_CONNECT_CONFIRM,
+ 	LM_LAP_DISCONNECT_INDICATION, 
+	LM_LAP_DISCONNECT_REQUEST,
+	LM_LAP_DISCOVERY_REQUEST,
+ 	LM_LAP_DISCOVERY_CONFIRM,
+	LM_LAP_IDLE_TIMEOUT,
+} IRLMP_EVENT;
+
+extern const char *const irlmp_state[];
+extern const char *const irlsap_state[];
+
+void irlmp_watchdog_timer_expired(void *data);
+void irlmp_discovery_timer_expired(void *data);
+void irlmp_idle_timer_expired(void *data);
+
+void irlmp_do_lap_event(struct lap_cb *self, IRLMP_EVENT event, 
+			struct sk_buff *skb);
+int irlmp_do_lsap_event(struct lsap_cb *self, IRLMP_EVENT event, 
+			struct sk_buff *skb);
+
+#endif /* IRLMP_EVENT_H */
+
+
+
+
diff --git a/drivers/staging/irda/include/net/irda/irlmp_frame.h b/drivers/staging/irda/include/net/irda/irlmp_frame.h
new file mode 100644
index 000000000000..1906eb71422e
--- /dev/null
+++ b/drivers/staging/irda/include/net/irda/irlmp_frame.h
@@ -0,0 +1,62 @@
+/*********************************************************************
+ *                
+ * Filename:      irlmp_frame.h
+ * Version:       0.9
+ * Description:   
+ * Status:        Experimental.
+ * Author:        Dag Brattli <dagb@cs.uit.no>
+ * Created at:    Tue Aug 19 02:09:59 1997
+ * Modified at:   Fri Dec 10 13:21:53 1999
+ * Modified by:   Dag Brattli <dagb@cs.uit.no>
+ * 
+ *     Copyright (c) 1997, 1999 Dag Brattli <dagb@cs.uit.no>, 
+ *     All Rights Reserved.
+ *     
+ *     This program is free software; you can redistribute it and/or 
+ *     modify it under the terms of the GNU General Public License as 
+ *     published by the Free Software Foundation; either version 2 of 
+ *     the License, or (at your option) any later version.
+ *
+ *     Neither Dag Brattli nor University of Tromsø admit liability nor
+ *     provide warranty for any of this software. This material is 
+ *     provided "AS-IS" and at no charge.
+ *
+ ********************************************************************/
+
+#ifndef IRMLP_FRAME_H
+#define IRMLP_FRAME_H
+
+#include <linux/skbuff.h>
+
+#include <net/irda/discovery.h>
+
+/* IrLMP frame opcodes */
+#define CONNECT_CMD    0x01
+#define CONNECT_CNF    0x81
+#define DISCONNECT     0x02
+#define ACCESSMODE_CMD 0x03
+#define ACCESSMODE_CNF 0x83
+
+#define CONTROL_BIT    0x80
+
+void irlmp_send_data_pdu(struct lap_cb *self, __u8 dlsap, __u8 slsap,
+				int expedited, struct sk_buff *skb);
+void irlmp_send_lcf_pdu(struct lap_cb *self, __u8 dlsap, __u8 slsap, 
+			__u8 opcode, struct sk_buff *skb);
+void irlmp_link_data_indication(struct lap_cb *, struct sk_buff *, 
+				int unreliable);
+#ifdef CONFIG_IRDA_ULTRA
+void irlmp_link_unitdata_indication(struct lap_cb *, struct sk_buff *);
+#endif /* CONFIG_IRDA_ULTRA */
+
+void irlmp_link_connect_indication(struct lap_cb *, __u32 saddr, __u32 daddr,
+				   struct qos_info *qos, struct sk_buff *skb);
+void irlmp_link_connect_request(__u32 daddr);
+void irlmp_link_connect_confirm(struct lap_cb *self, struct qos_info *qos, 
+				struct sk_buff *skb);
+void irlmp_link_disconnect_indication(struct lap_cb *, struct irlap_cb *, 
+				      LAP_REASON reason, struct sk_buff *); 
+void irlmp_link_discovery_confirm(struct lap_cb *self, hashbin_t *log);
+void irlmp_link_discovery_indication(struct lap_cb *, discovery_t *discovery);
+
+#endif
diff --git a/drivers/staging/irda/include/net/irda/irmod.h b/drivers/staging/irda/include/net/irda/irmod.h
new file mode 100644
index 000000000000..86f0dbb8ee5d
--- /dev/null
+++ b/drivers/staging/irda/include/net/irda/irmod.h
@@ -0,0 +1,109 @@
+/*********************************************************************
+ *                
+ * Filename:      irmod.h
+ * Version:       0.3
+ * Description:   IrDA module and utilities functions
+ * Status:        Experimental.
+ * Author:        Dag Brattli <dagb@cs.uit.no>
+ * Created at:    Mon Dec 15 13:58:52 1997
+ * Modified at:   Fri Jan 28 13:15:24 2000
+ * Modified by:   Dag Brattli <dagb@cs.uit.no>
+ *
+ *     Copyright (c) 1998-2000 Dag Brattli, All Rights Reserved.
+ *     Copyright (c) 2000-2002 Jean Tourrilhes <jt@hpl.hp.com>
+ *      
+ *     This program is free software; you can redistribute it and/or 
+ *     modify it under the terms of the GNU General Public License as 
+ *     published by the Free Software Foundation; either version 2 of 
+ *     the License, or (at your option) any later version.
+ *  
+ *     Neither Dag Brattli nor University of Tromsø admit liability nor
+ *     provide warranty for any of this software. This material is 
+ *     provided "AS-IS" and at no charg.
+ *     
+ ********************************************************************/
+
+#ifndef IRMOD_H
+#define IRMOD_H
+
+/* Misc status information */
+typedef enum {
+	STATUS_OK,
+	STATUS_ABORTED,
+	STATUS_NO_ACTIVITY,
+	STATUS_NOISY,
+	STATUS_REMOTE,
+} LINK_STATUS;
+
+typedef enum {
+	LOCK_NO_CHANGE,
+	LOCK_LOCKED,
+	LOCK_UNLOCKED,
+} LOCK_STATUS;
+
+typedef enum { FLOW_STOP, FLOW_START } LOCAL_FLOW;
+
+/*  
+ *  IrLMP disconnect reasons. The order is very important, since they 
+ *  correspond to disconnect reasons sent in IrLMP disconnect frames, so
+ *  please do not touch :-)
+ */
+typedef enum {
+	LM_USER_REQUEST = 1,  /* User request */
+	LM_LAP_DISCONNECT,    /* Unexpected IrLAP disconnect */
+	LM_CONNECT_FAILURE,   /* Failed to establish IrLAP connection */
+	LM_LAP_RESET,         /* IrLAP reset */
+	LM_INIT_DISCONNECT,   /* Link Management initiated disconnect */
+	LM_LSAP_NOTCONN,      /* Data delivered on unconnected LSAP */
+	LM_NON_RESP_CLIENT,   /* Non responsive LM-MUX client */
+	LM_NO_AVAIL_CLIENT,   /* No available LM-MUX client */
+	LM_CONN_HALF_OPEN,    /* Connection is half open */
+	LM_BAD_SOURCE_ADDR,   /* Illegal source address (i.e 0x00) */
+} LM_REASON;
+#define LM_UNKNOWN 0xff       /* Unspecified disconnect reason */
+
+/* A few forward declarations (to make compiler happy) */
+struct qos_info;		/* in <net/irda/qos.h> */
+
+/*
+ *  Notify structure used between transport and link management layers
+ */
+typedef struct {
+	int (*data_indication)(void *priv, void *sap, struct sk_buff *skb);
+	int (*udata_indication)(void *priv, void *sap, struct sk_buff *skb);
+	void (*connect_confirm)(void *instance, void *sap, 
+				struct qos_info *qos, __u32 max_sdu_size,
+				__u8 max_header_size, struct sk_buff *skb);
+	void (*connect_indication)(void *instance, void *sap, 
+				   struct qos_info *qos, __u32 max_sdu_size, 
+				   __u8 max_header_size, struct sk_buff *skb);
+	void (*disconnect_indication)(void *instance, void *sap, 
+				      LM_REASON reason, struct sk_buff *);
+	void (*flow_indication)(void *instance, void *sap, LOCAL_FLOW flow);
+	void (*status_indication)(void *instance,
+				  LINK_STATUS link, LOCK_STATUS lock);
+	void *instance; /* Layer instance pointer */
+	char name[16];  /* Name of layer */
+} notify_t;
+
+#define NOTIFY_MAX_NAME 16
+
+/* Zero the notify structure */
+void irda_notify_init(notify_t *notify);
+
+/* Locking wrapper - Note the inverted logic on irda_lock().
+ * Those function basically return false if the lock is already in the
+ * position you want to set it. - Jean II */
+#define irda_lock(lock)		(! test_and_set_bit(0, (void *) (lock)))
+#define irda_unlock(lock)	(test_and_clear_bit(0, (void *) (lock)))
+
+#endif /* IRMOD_H */
+
+
+
+
+
+
+
+
+
diff --git a/drivers/staging/irda/include/net/irda/irqueue.h b/drivers/staging/irda/include/net/irda/irqueue.h
new file mode 100644
index 000000000000..37f512bd6733
--- /dev/null
+++ b/drivers/staging/irda/include/net/irda/irqueue.h
@@ -0,0 +1,96 @@
+/*********************************************************************
+ *                
+ * Filename:      irqueue.h
+ * Version:       0.3
+ * Description:   General queue implementation
+ * Status:        Experimental.
+ * Author:        Dag Brattli <dagb@cs.uit.no>
+ * Created at:    Tue Jun  9 13:26:50 1998
+ * Modified at:   Thu Oct  7 13:25:16 1999
+ * Modified by:   Dag Brattli <dagb@cs.uit.no>
+ * 
+ *     Copyright (C) 1998-1999, Aage Kvalnes <aage@cs.uit.no>
+ *     Copyright (c) 1998, Dag Brattli
+ *     All Rights Reserved.
+ *      
+ *     This code is taken from the Vortex Operating System written by Aage
+ *     Kvalnes and has been ported to Linux and Linux/IR by Dag Brattli
+ *
+ *     This program is free software; you can redistribute it and/or 
+ *     modify it under the terms of the GNU General Public License as 
+ *     published by the Free Software Foundation; either version 2 of 
+ *     the License, or (at your option) any later version.
+ *  
+ *     Neither Dag Brattli nor University of Tromsø admit liability nor
+ *     provide warranty for any of this software. This material is 
+ *     provided "AS-IS" and at no charge.
+ *     
+ ********************************************************************/
+
+#include <linux/types.h>
+#include <linux/spinlock.h>
+
+#ifndef IRDA_QUEUE_H
+#define IRDA_QUEUE_H
+
+#define NAME_SIZE      32
+
+/*
+ * Hash types (some flags can be xored)
+ * See comments in irqueue.c for which one to use...
+ */
+#define HB_NOLOCK	0	/* No concurent access prevention */
+#define HB_LOCK		1	/* Prevent concurent write with global lock */
+
+/*
+ * Hash defines
+ */
+#define HASHBIN_SIZE   8
+#define HASHBIN_MASK   0x7
+
+#ifndef IRDA_ALIGN 
+#define IRDA_ALIGN __attribute__((aligned))
+#endif
+
+#define Q_NULL { NULL, NULL, "", 0 }
+
+typedef void (*FREE_FUNC)(void *arg);
+
+struct irda_queue {
+	struct irda_queue *q_next;
+	struct irda_queue *q_prev;
+
+	char   q_name[NAME_SIZE];
+	long   q_hash;			/* Must be able to cast a (void *) */
+};
+typedef struct irda_queue irda_queue_t;
+
+typedef struct hashbin_t {
+	__u32      magic;
+	int        hb_type;
+	int        hb_size;
+	spinlock_t hb_spinlock;		/* HB_LOCK - Can be used by the user */
+
+	irda_queue_t* hb_queue[HASHBIN_SIZE] IRDA_ALIGN;
+
+	irda_queue_t* hb_current;
+} hashbin_t;
+
+hashbin_t *hashbin_new(int type);
+int      hashbin_delete(hashbin_t* hashbin, FREE_FUNC func);
+int      hashbin_clear(hashbin_t* hashbin, FREE_FUNC free_func);
+void     hashbin_insert(hashbin_t* hashbin, irda_queue_t* entry, long hashv, 
+			const char* name);
+void*    hashbin_remove(hashbin_t* hashbin, long hashv, const char* name);
+void*    hashbin_remove_first(hashbin_t *hashbin);
+void*	 hashbin_remove_this( hashbin_t* hashbin, irda_queue_t* entry);
+void*    hashbin_find(hashbin_t* hashbin, long hashv, const char* name);
+void*    hashbin_lock_find(hashbin_t* hashbin, long hashv, const char* name);
+void*    hashbin_find_next(hashbin_t* hashbin, long hashv, const char* name,
+			   void ** pnext);
+irda_queue_t *hashbin_get_first(hashbin_t *hashbin);
+irda_queue_t *hashbin_get_next(hashbin_t *hashbin);
+
+#define HASHBIN_GET_SIZE(hashbin) hashbin->hb_size
+
+#endif
diff --git a/drivers/staging/irda/include/net/irda/irttp.h b/drivers/staging/irda/include/net/irda/irttp.h
new file mode 100644
index 000000000000..98682d4bae8f
--- /dev/null
+++ b/drivers/staging/irda/include/net/irda/irttp.h
@@ -0,0 +1,210 @@
+/*********************************************************************
+ *                
+ * Filename:      irttp.h
+ * Version:       1.0
+ * Description:   Tiny Transport Protocol (TTP) definitions
+ * Status:        Experimental.
+ * Author:        Dag Brattli <dagb@cs.uit.no>
+ * Created at:    Sun Aug 31 20:14:31 1997
+ * Modified at:   Sun Dec 12 13:09:07 1999
+ * Modified by:   Dag Brattli <dagb@cs.uit.no>
+ * 
+ *     Copyright (c) 1998-1999 Dag Brattli <dagb@cs.uit.no>, 
+ *     All Rights Reserved.
+ *     Copyright (c) 2000-2002 Jean Tourrilhes <jt@hpl.hp.com>
+ *     
+ *     This program is free software; you can redistribute it and/or 
+ *     modify it under the terms of the GNU General Public License as 
+ *     published by the Free Software Foundation; either version 2 of 
+ *     the License, or (at your option) any later version.
+ *
+ *     Neither Dag Brattli nor University of Tromsø admit liability nor
+ *     provide warranty for any of this software. This material is 
+ *     provided "AS-IS" and at no charge.
+ *
+ ********************************************************************/
+
+#ifndef IRTTP_H
+#define IRTTP_H
+
+#include <linux/types.h>
+#include <linux/skbuff.h>
+#include <linux/spinlock.h>
+
+#include <net/irda/irda.h>
+#include <net/irda/irlmp.h>		/* struct lsap_cb */
+#include <net/irda/qos.h>		/* struct qos_info */
+#include <net/irda/irqueue.h>
+
+#define TTP_MAX_CONNECTIONS    LM_MAX_CONNECTIONS
+#define TTP_HEADER             1
+#define TTP_MAX_HEADER         (TTP_HEADER + LMP_MAX_HEADER)
+#define TTP_SAR_HEADER         5
+#define TTP_PARAMETERS         0x80
+#define TTP_MORE               0x80
+
+/* Transmission queue sizes */
+/* Worst case scenario, two window of data - Jean II */
+#define TTP_TX_MAX_QUEUE	14
+/* We need to keep at least 5 frames to make sure that we can refill
+ * appropriately the LAP layer. LAP keeps only two buffers, and we need
+ * to have 7 to make a full window - Jean II */
+#define TTP_TX_LOW_THRESHOLD	5
+/* Most clients are synchronous with respect to flow control, so we can
+ * keep a low number of Tx buffers in TTP - Jean II */
+#define TTP_TX_HIGH_THRESHOLD	7
+
+/* Receive queue sizes */
+/* Minimum of credit that the peer should hold.
+ * If the peer has less credits than 9 frames, we will explicitly send
+ * him some credits (through irttp_give_credit() and a specific frame).
+ * Note that when we give credits it's likely that it won't be sent in
+ * this LAP window, but in the next one. So, we make sure that the peer
+ * has something to send while waiting for credits (one LAP window == 7
+ * + 1 frames while he process the credits). - Jean II */
+#define TTP_RX_MIN_CREDIT	8
+/* This is the default maximum number of credits held by the peer, so the
+ * default maximum number of frames he can send us before needing flow
+ * control answer from us (this may be negociated differently at TSAP setup).
+ * We want to minimise the number of times we have to explicitly send some
+ * credit to the peer, hoping we can piggyback it on the return data. In
+ * particular, it doesn't make sense for us to send credit more than once
+ * per LAP window.
+ * Moreover, giving credits has some latency, so we need strictly more than
+ * a LAP window, otherwise we may already have credits in our Tx queue.
+ * But on the other hand, we don't want to keep too many Rx buffer here
+ * before starting to flow control the other end, so make it exactly one
+ * LAP window + 1 + MIN_CREDITS. - Jean II */
+#define TTP_RX_DEFAULT_CREDIT	16
+/* Maximum number of credits we can allow the peer to have, and therefore
+ * maximum Rx queue size.
+ * Note that we try to deliver packets to the higher layer every time we
+ * receive something, so in normal mode the Rx queue will never contains
+ * more than one or two packets. - Jean II */
+#define TTP_RX_MAX_CREDIT	21
+
+/* What clients should use when calling ttp_open_tsap() */
+#define DEFAULT_INITIAL_CREDIT	TTP_RX_DEFAULT_CREDIT
+
+/* Some priorities for disconnect requests */
+#define P_NORMAL    0
+#define P_HIGH      1
+
+#define TTP_SAR_DISABLE 0
+#define TTP_SAR_UNBOUND 0xffffffff
+
+/* Parameters */
+#define TTP_MAX_SDU_SIZE 0x01
+
+/*
+ *  This structure contains all data associated with one instance of a TTP 
+ *  connection.
+ */
+struct tsap_cb {
+	irda_queue_t q;            /* Must be first */
+	magic_t magic;        /* Just in case */
+
+	__u8 stsap_sel;       /* Source TSAP */
+	__u8 dtsap_sel;       /* Destination TSAP */
+
+	struct lsap_cb *lsap; /* Corresponding LSAP to this TSAP */
+
+	__u8 connected;       /* TSAP connected */
+	 
+	__u8 initial_credit;  /* Initial credit to give peer */
+
+        int avail_credit;    /* Available credit to return to peer */
+	int remote_credit;   /* Credit held by peer TTP entity */
+	int send_credit;     /* Credit held by local TTP entity */
+	
+	struct sk_buff_head tx_queue; /* Frames to be transmitted */
+	struct sk_buff_head rx_queue; /* Received frames */
+	struct sk_buff_head rx_fragments;
+	int tx_queue_lock;
+	int rx_queue_lock;
+	spinlock_t lock;
+
+	notify_t notify;       /* Callbacks to client layer */
+
+	struct net_device_stats stats;
+	struct timer_list todo_timer; 
+
+	__u32 max_seg_size;     /* Max data that fit into an IrLAP frame */
+	__u8  max_header_size;
+
+	int   rx_sdu_busy;     /* RxSdu.busy */
+	__u32 rx_sdu_size;     /* Current size of a partially received frame */
+	__u32 rx_max_sdu_size; /* Max receive user data size */
+
+	int tx_sdu_busy;       /* TxSdu.busy */
+	__u32 tx_max_sdu_size; /* Max transmit user data size */
+
+	int close_pend;        /* Close, but disconnect_pend */
+	unsigned long disconnect_pend; /* Disconnect, but still data to send */
+	struct sk_buff *disconnect_skb;
+};
+
+struct irttp_cb {
+	magic_t    magic;	
+	hashbin_t *tsaps;
+};
+
+int  irttp_init(void);
+void irttp_cleanup(void);
+
+struct tsap_cb *irttp_open_tsap(__u8 stsap_sel, int credit, notify_t *notify);
+int irttp_close_tsap(struct tsap_cb *self);
+
+int irttp_data_request(struct tsap_cb *self, struct sk_buff *skb);
+int irttp_udata_request(struct tsap_cb *self, struct sk_buff *skb);
+
+int irttp_connect_request(struct tsap_cb *self, __u8 dtsap_sel, 
+			  __u32 saddr, __u32 daddr,
+			  struct qos_info *qos, __u32 max_sdu_size, 
+			  struct sk_buff *userdata);
+int irttp_connect_response(struct tsap_cb *self, __u32 max_sdu_size, 
+			    struct sk_buff *userdata);
+int irttp_disconnect_request(struct tsap_cb *self, struct sk_buff *skb,
+			     int priority);
+void irttp_flow_request(struct tsap_cb *self, LOCAL_FLOW flow);
+struct tsap_cb *irttp_dup(struct tsap_cb *self, void *instance);
+
+static inline __u32 irttp_get_saddr(struct tsap_cb *self)
+{
+	return irlmp_get_saddr(self->lsap);
+}
+
+static inline __u32 irttp_get_daddr(struct tsap_cb *self)
+{
+	return irlmp_get_daddr(self->lsap);
+}
+
+static inline __u32 irttp_get_max_seg_size(struct tsap_cb *self)
+{
+	return self->max_seg_size;
+}
+
+/* After doing a irttp_dup(), this get one of the two socket back into
+ * a state where it's waiting incoming connections.
+ * Note : this can be used *only* if the socket is not yet connected
+ * (i.e. NO irttp_connect_response() done on this socket).
+ * - Jean II */
+static inline void irttp_listen(struct tsap_cb *self)
+{
+	irlmp_listen(self->lsap);
+	self->dtsap_sel = LSAP_ANY;
+}
+
+/* Return TRUE if the node is in primary mode (i.e. master)
+ * - Jean II */
+static inline int irttp_is_primary(struct tsap_cb *self)
+{
+	if ((self == NULL) ||
+	    (self->lsap == NULL) ||
+	    (self->lsap->lap == NULL) ||
+	    (self->lsap->lap->irlap == NULL))
+		return -2;
+	return irlap_is_primary(self->lsap->lap->irlap);
+}
+
+#endif /* IRTTP_H */
diff --git a/drivers/staging/irda/include/net/irda/parameters.h b/drivers/staging/irda/include/net/irda/parameters.h
new file mode 100644
index 000000000000..2d9cd0007cba
--- /dev/null
+++ b/drivers/staging/irda/include/net/irda/parameters.h
@@ -0,0 +1,100 @@
+/*********************************************************************
+ *                
+ * Filename:      parameters.h
+ * Version:       1.0
+ * Description:   A more general way to handle (pi,pl,pv) parameters
+ * Status:        Experimental.
+ * Author:        Dag Brattli <dagb@cs.uit.no>
+ * Created at:    Mon Jun  7 08:47:28 1999
+ * Modified at:   Sun Jan 30 14:05:14 2000
+ * Modified by:   Dag Brattli <dagb@cs.uit.no>
+ * 
+ *     Copyright (c) 1999-2000 Dag Brattli, All Rights Reserved.
+ *     
+ *     This program is free software; you can redistribute it and/or 
+ *     modify it under the terms of the GNU General Public License as 
+ *     published by the Free Software Foundation; either version 2 of 
+ *     the License, or (at your option) any later version.
+ * 
+ *     This program is distributed in the hope that it will be useful,
+ *     but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *     GNU General Public License for more details.
+ * 
+ *     You should have received a copy of the GNU General Public License 
+ *     along with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ *     Michel Dänzer <daenzer@debian.org>, 10/2001
+ *     - simplify irda_pv_t to avoid endianness issues
+ *     
+ ********************************************************************/
+
+#ifndef IRDA_PARAMS_H
+#define IRDA_PARAMS_H
+
+/*
+ *  The currently supported types. Beware not to change the sequence since
+ *  it a good reason why the sized integers has a value equal to their size
+ */
+typedef enum {
+	PV_INTEGER,      /* Integer of any (pl) length */
+	PV_INT_8_BITS,   /* Integer of 8 bits in length */
+	PV_INT_16_BITS,  /* Integer of 16 bits in length */
+	PV_STRING,       /* \0 terminated string */
+	PV_INT_32_BITS,  /* Integer of 32 bits in length */
+	PV_OCT_SEQ,      /* Octet sequence */
+	PV_NO_VALUE      /* Does not contain any value (pl=0) */
+} PV_TYPE;
+
+/* Bit 7 of type field */
+#define PV_BIG_ENDIAN    0x80 
+#define PV_LITTLE_ENDIAN 0x00
+#define PV_MASK          0x7f   /* To mask away endian bit */
+
+#define PV_PUT 0
+#define PV_GET 1
+
+typedef union {
+	char   *c;
+	__u32   i;
+	__u32 *ip;
+} irda_pv_t;
+
+typedef struct {
+	__u8 pi;
+	__u8 pl;
+	irda_pv_t pv;
+} irda_param_t;
+
+typedef int (*PI_HANDLER)(void *self, irda_param_t *param, int get);
+typedef int (*PV_HANDLER)(void *self, __u8 *buf, int len, __u8 pi,
+			  PV_TYPE type, PI_HANDLER func);
+
+typedef struct {
+	const PI_HANDLER func;  /* Handler for this parameter identifier */
+	PV_TYPE    type;  /* Data type for this parameter */
+} pi_minor_info_t;
+
+typedef struct {
+	const pi_minor_info_t *pi_minor_call_table;
+	int len;
+} pi_major_info_t;
+
+typedef struct {
+	const pi_major_info_t *tables;
+	int              len;
+	__u8             pi_mask;
+	int              pi_major_offset;
+} pi_param_info_t;
+
+int irda_param_pack(__u8 *buf, char *fmt, ...);
+
+int irda_param_insert(void *self, __u8 pi, __u8 *buf, int len, 
+		      pi_param_info_t *info);
+int irda_param_extract_all(void *self, __u8 *buf, int len, 
+			   pi_param_info_t *info);
+
+#define irda_param_insert_byte(buf,pi,pv) irda_param_pack(buf,"bbb",pi,1,pv)
+
+#endif /* IRDA_PARAMS_H */
+
diff --git a/drivers/staging/irda/include/net/irda/qos.h b/drivers/staging/irda/include/net/irda/qos.h
new file mode 100644
index 000000000000..05a5a249956f
--- /dev/null
+++ b/drivers/staging/irda/include/net/irda/qos.h
@@ -0,0 +1,101 @@
+/*********************************************************************
+ *                
+ * Filename:      qos.h
+ * Version:       1.0
+ * Description:   Quality of Service definitions
+ * Status:        Experimental.
+ * Author:        Dag Brattli <dagb@cs.uit.no>
+ * Created at:    Fri Sep 19 23:21:09 1997
+ * Modified at:   Thu Dec  2 13:51:54 1999
+ * Modified by:   Dag Brattli <dagb@cs.uit.no>
+ * 
+ *     Copyright (c) 1999 Dag Brattli, All Rights Reserved.
+ *     
+ *     This program is free software; you can redistribute it and/or 
+ *     modify it under the terms of the GNU General Public License as 
+ *     published by the Free Software Foundation; either version 2 of 
+ *     the License, or (at your option) any later version.
+ * 
+ *     This program is distributed in the hope that it will be useful,
+ *     but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *     GNU General Public License for more details.
+ * 
+ *     You should have received a copy of the GNU General Public License 
+ *     along with this program; if not, see <http://www.gnu.org/licenses/>.
+ *     
+ ********************************************************************/
+
+#ifndef IRDA_QOS_H
+#define IRDA_QOS_H
+
+#include <linux/skbuff.h>
+
+#include <net/irda/parameters.h>
+
+#define PI_BAUD_RATE     0x01
+#define PI_MAX_TURN_TIME 0x82
+#define PI_DATA_SIZE     0x83
+#define PI_WINDOW_SIZE   0x84
+#define PI_ADD_BOFS      0x85
+#define PI_MIN_TURN_TIME 0x86
+#define PI_LINK_DISC     0x08
+
+#define IR_115200_MAX 0x3f
+
+/* Baud rates (first byte) */
+#define IR_2400     0x01
+#define IR_9600     0x02
+#define IR_19200    0x04
+#define IR_38400    0x08
+#define IR_57600    0x10
+#define IR_115200   0x20
+#define IR_576000   0x40
+#define IR_1152000  0x80
+
+/* Baud rates (second byte) */
+#define IR_4000000  0x01
+#define IR_16000000 0x02
+
+/* Quality of Service information */
+typedef struct {
+	__u32 value;
+	__u16 bits; /* LSB is first byte, MSB is second byte */
+} qos_value_t;
+
+struct qos_info {
+	magic_t magic;
+
+	qos_value_t baud_rate;       /* IR_11520O | ... */
+	qos_value_t max_turn_time;
+	qos_value_t data_size;
+	qos_value_t window_size;
+	qos_value_t additional_bofs;
+	qos_value_t min_turn_time;
+	qos_value_t link_disc_time;
+	
+	qos_value_t power;
+};
+
+extern int sysctl_max_baud_rate;
+extern int sysctl_max_inactive_time;
+
+void irda_init_max_qos_capabilies(struct qos_info *qos);
+void irda_qos_compute_intersection(struct qos_info *, struct qos_info *);
+
+__u32 irlap_max_line_capacity(__u32 speed, __u32 max_turn_time);
+
+void irda_qos_bits_to_value(struct qos_info *qos);
+
+/* So simple, how could we not inline those two ?
+ * Note : one byte is 10 bits if you include start and stop bits
+ * Jean II */
+#define irlap_min_turn_time_in_bytes(speed, min_turn_time) (	\
+	speed * min_turn_time / 10000000			\
+)
+#define irlap_xbofs_in_usec(speed, xbofs) (			\
+	xbofs * 10000000 / speed				\
+)
+
+#endif
+
diff --git a/drivers/staging/irda/include/net/irda/timer.h b/drivers/staging/irda/include/net/irda/timer.h
new file mode 100644
index 000000000000..d784f242cf7b
--- /dev/null
+++ b/drivers/staging/irda/include/net/irda/timer.h
@@ -0,0 +1,105 @@
+/*********************************************************************
+ *                
+ * Filename:      timer.h
+ * Version:       
+ * Description:   
+ * Status:        Experimental.
+ * Author:        Dag Brattli <dagb@cs.uit.no>
+ * Created at:    Sat Aug 16 00:59:29 1997
+ * Modified at:   Thu Oct  7 12:25:24 1999
+ * Modified by:   Dag Brattli <dagb@cs.uit.no>
+ * 
+ *     Copyright (c) 1997, 1998-1999 Dag Brattli <dagb@cs.uit.no>, 
+ *     All Rights Reserved.
+ *     Copyright (c) 2000-2002 Jean Tourrilhes <jt@hpl.hp.com>
+ *     
+ *     This program is free software; you can redistribute it and/or 
+ *     modify it under the terms of the GNU General Public License as 
+ *     published by the Free Software Foundation; either version 2 of 
+ *     the License, or (at your option) any later version.
+ *
+ *     Neither Dag Brattli nor University of Tromsø admit liability nor
+ *     provide warranty for any of this software. This material is 
+ *     provided "AS-IS" and at no charge.
+ *
+ ********************************************************************/
+
+#ifndef TIMER_H
+#define TIMER_H
+
+#include <linux/timer.h>
+#include <linux/jiffies.h>
+
+#include <asm/param.h>  /* for HZ */
+
+#include <net/irda/irda.h>
+
+/* A few forward declarations (to make compiler happy) */
+struct irlmp_cb;
+struct irlap_cb;
+struct lsap_cb;
+struct lap_cb;
+
+/* 
+ *  Timeout definitions, some defined in IrLAP 6.13.5 - p. 92
+ */
+#define POLL_TIMEOUT        (450*HZ/1000)    /* Must never exceed 500 ms */
+#define FINAL_TIMEOUT       (500*HZ/1000)    /* Must never exceed 500 ms */
+
+/* 
+ *  Normally twice of p-timer. Note 3, IrLAP 6.3.11.2 - p. 60 suggests
+ *  at least twice duration of the P-timer.
+ */
+#define WD_TIMEOUT          (POLL_TIMEOUT*2)
+
+#define MEDIABUSY_TIMEOUT   (500*HZ/1000)    /* 500 msec */
+#define SMALLBUSY_TIMEOUT   (100*HZ/1000)    /* 100 msec - IrLAP 6.13.4 */
+
+/*
+ *  Slot timer must never exceed 85 ms, and must always be at least 25 ms, 
+ *  suggested to  75-85 msec by IrDA lite. This doesn't work with a lot of
+ *  devices, and other stackes uses a lot more, so it's best we do it as well
+ *  (Note : this is the default value and sysctl overrides it - Jean II)
+ */
+#define SLOT_TIMEOUT            (90*HZ/1000)
+
+/* 
+ *  The latest discovery frame (XID) is longer due to the extra discovery
+ *  information (hints, device name...). This is its extra length.
+ *  We use that when setting the query timeout. Jean II
+ */
+#define XIDEXTRA_TIMEOUT        (34*HZ/1000)  /* 34 msec */
+
+#define WATCHDOG_TIMEOUT        (20*HZ)       /* 20 sec */
+
+typedef void (*TIMER_CALLBACK)(void *);
+
+static inline void irda_start_timer(struct timer_list *ptimer, int timeout, 
+				    void* data, TIMER_CALLBACK callback)
+{
+	ptimer->function = (void (*)(unsigned long)) callback;
+	ptimer->data = (unsigned long) data;
+	
+	/* Set new value for timer (update or add timer).
+	 * We use mod_timer() because it's more efficient and also
+	 * safer with respect to race conditions - Jean II */
+	mod_timer(ptimer, jiffies + timeout);
+}
+
+
+void irlap_start_slot_timer(struct irlap_cb *self, int timeout);
+void irlap_start_query_timer(struct irlap_cb *self, int S, int s);
+void irlap_start_final_timer(struct irlap_cb *self, int timeout);
+void irlap_start_wd_timer(struct irlap_cb *self, int timeout);
+void irlap_start_backoff_timer(struct irlap_cb *self, int timeout);
+
+void irlap_start_mbusy_timer(struct irlap_cb *self, int timeout);
+void irlap_stop_mbusy_timer(struct irlap_cb *);
+
+void irlmp_start_watchdog_timer(struct lsap_cb *, int timeout);
+void irlmp_start_discovery_timer(struct irlmp_cb *, int timeout);
+void irlmp_start_idle_timer(struct lap_cb *, int timeout);
+void irlmp_stop_idle_timer(struct lap_cb *self);
+
+#endif
+
diff --git a/drivers/staging/irda/include/net/irda/wrapper.h b/drivers/staging/irda/include/net/irda/wrapper.h
new file mode 100644
index 000000000000..eef53ebe3d76
--- /dev/null
+++ b/drivers/staging/irda/include/net/irda/wrapper.h
@@ -0,0 +1,58 @@
+/*********************************************************************
+ *                
+ * Filename:      wrapper.h
+ * Version:       1.2
+ * Description:   IrDA SIR async wrapper layer
+ * Status:        Experimental.
+ * Author:        Dag Brattli <dagb@cs.uit.no>
+ * Created at:    Mon Aug  4 20:40:53 1997
+ * Modified at:   Tue Jan 11 12:37:29 2000
+ * Modified by:   Dag Brattli <dagb@cs.uit.no>
+ * 
+ *     Copyright (c) 1998-2000 Dag Brattli <dagb@cs.uit.no>, 
+ *     All Rights Reserved.
+ *     
+ *     This program is free software; you can redistribute it and/or 
+ *     modify it under the terms of the GNU General Public License as 
+ *     published by the Free Software Foundation; either version 2 of 
+ *     the License, or (at your option) any later version.
+ *
+ *     Neither Dag Brattli nor University of Tromsø admit liability nor
+ *     provide warranty for any of this software. This material is 
+ *     provided "AS-IS" and at no charge.
+ *
+ ********************************************************************/
+
+#ifndef WRAPPER_H
+#define WRAPPER_H
+
+#include <linux/types.h>
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+
+#include <net/irda/irda_device.h>	/* iobuff_t */
+
+#define BOF  0xc0 /* Beginning of frame */
+#define XBOF 0xff
+#define EOF  0xc1 /* End of frame */
+#define CE   0x7d /* Control escape */
+
+#define STA BOF  /* Start flag */
+#define STO EOF  /* End flag */
+
+#define IRDA_TRANS 0x20    /* Asynchronous transparency modifier */       
+
+/* States for receiving a frame in async mode */
+enum {
+	OUTSIDE_FRAME, 
+	BEGIN_FRAME, 
+	LINK_ESCAPE, 
+	INSIDE_FRAME
+};
+
+/* Proto definitions */
+int async_wrap_skb(struct sk_buff *skb, __u8 *tx_buff, int buffsize);
+void async_unwrap_char(struct net_device *dev, struct net_device_stats *stats,
+		       iobuff_t *buf, __u8 byte);
+
+#endif
diff --git a/drivers/staging/irda/net/Makefile b/drivers/staging/irda/net/Makefile
index 187f6c563a4b..bd1a635b88cf 100644
--- a/drivers/staging/irda/net/Makefile
+++ b/drivers/staging/irda/net/Makefile
@@ -2,6 +2,8 @@
 # Makefile for the Linux IrDA protocol layer.
 #
 
+subdir-ccflags-y += -I$(srctree)/drivers/staging/irda/include
+
 obj-$(CONFIG_IRDA) += irda.o
 obj-$(CONFIG_IRLAN) += irlan/
 obj-$(CONFIG_IRNET) += irnet/
diff --git a/include/net/irda/af_irda.h b/include/net/irda/af_irda.h
deleted file mode 100644
index 0df574931522..000000000000
--- a/include/net/irda/af_irda.h
+++ /dev/null
@@ -1,87 +0,0 @@
-/*********************************************************************
- *                
- * Filename:      af_irda.h
- * Version:       1.0
- * Description:   IrDA sockets declarations
- * Status:        Stable
- * Author:        Dag Brattli <dagb@cs.uit.no>
- * Created at:    Tue Dec  9 21:13:12 1997
- * Modified at:   Fri Jan 28 13:16:32 2000
- * Modified by:   Dag Brattli <dagb@cs.uit.no>
- * 
- *     Copyright (c) 1998-2000 Dag Brattli, All Rights Reserved.
- *     Copyright (c) 2000-2002 Jean Tourrilhes <jt@hpl.hp.com>
- *      
- *     This program is free software; you can redistribute it and/or 
- *     modify it under the terms of the GNU General Public License as 
- *     published by the Free Software Foundation; either version 2 of 
- *     the License, or (at your option) any later version.
- *  
- *     Neither Dag Brattli nor University of Tromsø admit liability nor
- *     provide warranty for any of this software. This material is 
- *     provided "AS-IS" and at no charge.
- *     
- ********************************************************************/
-
-#ifndef AF_IRDA_H
-#define AF_IRDA_H
-
-#include <linux/irda.h>
-#include <net/irda/irda.h>
-#include <net/irda/iriap.h>		/* struct iriap_cb */
-#include <net/irda/irias_object.h>	/* struct ias_value */
-#include <net/irda/irlmp.h>		/* struct lsap_cb */
-#include <net/irda/irttp.h>		/* struct tsap_cb */
-#include <net/irda/discovery.h>		/* struct discovery_t */
-#include <net/sock.h>
-
-/* IrDA Socket */
-struct irda_sock {
-	/* struct sock has to be the first member of irda_sock */
-	struct sock sk;
-	__u32 saddr;          /* my local address */
-	__u32 daddr;          /* peer address */
-
-	struct lsap_cb *lsap; /* LSAP used by Ultra */
-	__u8  pid;            /* Protocol IP (PID) used by Ultra */
-
-	struct tsap_cb *tsap; /* TSAP used by this connection */
-	__u8 dtsap_sel;       /* remote TSAP address */
-	__u8 stsap_sel;       /* local TSAP address */
-	
-	__u32 max_sdu_size_rx;
-	__u32 max_sdu_size_tx;
-	__u32 max_data_size;
-	__u8  max_header_size;
-	struct qos_info qos_tx;
-
-	__u16_host_order mask;           /* Hint bits mask */
-	__u16_host_order hints;          /* Hint bits */
-
-	void *ckey;           /* IrLMP client handle */
-	void *skey;           /* IrLMP service handle */
-
-	struct ias_object *ias_obj;   /* Our service name + lsap in IAS */
-	struct iriap_cb *iriap;	      /* Used to query remote IAS */
-	struct ias_value *ias_result; /* Result of remote IAS query */
-
-	hashbin_t *cachelog;		/* Result of discovery query */
-	__u32 cachedaddr;	/* Result of selective discovery query */
-
-	int nslots;           /* Number of slots to use for discovery */
-
-	int errno;            /* status of the IAS query */
-
-	wait_queue_head_t query_wait;	/* Wait for the answer to a query */
-	struct timer_list watchdog;	/* Timeout for discovery */
-
-	LOCAL_FLOW tx_flow;
-	LOCAL_FLOW rx_flow;
-};
-
-static inline struct irda_sock *irda_sk(struct sock *sk)
-{
-	return (struct irda_sock *)sk;
-}
-
-#endif /* AF_IRDA_H */
diff --git a/include/net/irda/crc.h b/include/net/irda/crc.h
deleted file mode 100644
index f202296df9bb..000000000000
--- a/include/net/irda/crc.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/*********************************************************************
- *                
- * Filename:      crc.h
- * Version:       
- * Description:   CRC routines
- * Status:        Experimental.
- * Author:        Dag Brattli <dagb@cs.uit.no>
- * Created at:    Mon Aug  4 20:40:53 1997
- * Modified at:   Sun May  2 20:25:23 1999
- * Modified by:   Dag Brattli <dagb@cs.uit.no>
- * 
- ********************************************************************/
-
-#ifndef IRDA_CRC_H
-#define IRDA_CRC_H
-
-#include <linux/types.h>
-#include <linux/crc-ccitt.h>
-
-#define INIT_FCS  0xffff   /* Initial FCS value */
-#define GOOD_FCS  0xf0b8   /* Good final FCS value */
-
-/* Recompute the FCS with one more character appended. */
-#define irda_fcs(fcs, c) crc_ccitt_byte(fcs, c)
-
-/* Recompute the FCS with len bytes appended. */
-#define irda_calc_crc16(fcs, buf, len) crc_ccitt(fcs, buf, len)
-
-#endif
diff --git a/include/net/irda/discovery.h b/include/net/irda/discovery.h
deleted file mode 100644
index 63ae32530567..000000000000
--- a/include/net/irda/discovery.h
+++ /dev/null
@@ -1,95 +0,0 @@
-/*********************************************************************
- *                
- * Filename:      discovery.h
- * Version:       
- * Description:   
- * Status:        Experimental.
- * Author:        Dag Brattli <dagb@cs.uit.no>
- * Created at:    Tue Apr  6 16:53:53 1999
- * Modified at:   Tue Oct  5 10:05:10 1999
- * Modified by:   Dag Brattli <dagb@cs.uit.no>
- * 
- *     Copyright (c) 1999 Dag Brattli, All Rights Reserved.
- *     Copyright (c) 2000-2002 Jean Tourrilhes <jt@hpl.hp.com>
- *     
- *     This program is free software; you can redistribute it and/or 
- *     modify it under the terms of the GNU General Public License as 
- *     published by the Free Software Foundation; either version 2 of 
- *     the License, or (at your option) any later version.
- * 
- *     This program is distributed in the hope that it will be useful,
- *     but WITHOUT ANY WARRANTY; without even the implied warranty of
- *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- *     GNU General Public License for more details.
- * 
- *     You should have received a copy of the GNU General Public License 
- *     along with this program; if not, see <http://www.gnu.org/licenses/>.
- *     
- ********************************************************************/
-
-#ifndef DISCOVERY_H
-#define DISCOVERY_H
-
-#include <asm/param.h>
-
-#include <net/irda/irda.h>
-#include <net/irda/irqueue.h>		/* irda_queue_t */
-#include <net/irda/irlap_event.h>	/* LAP_REASON */
-
-#define DISCOVERY_EXPIRE_TIMEOUT (2*sysctl_discovery_timeout*HZ)
-#define DISCOVERY_DEFAULT_SLOTS  0
-
-/*
- *  This type is used by the protocols that transmit 16 bits words in 
- *  little endian format. A little endian machine stores MSB of word in
- *  byte[1] and LSB in byte[0]. A big endian machine stores MSB in byte[0] 
- *  and LSB in byte[1].
- *
- * This structure is used in the code for things that are endian neutral
- * but that fit in a word so that we can manipulate them efficiently.
- * By endian neutral, I mean things that are really an array of bytes,
- * and always used as such, for example the hint bits. Jean II
- */
-typedef union {
-	__u16 word;
-	__u8  byte[2];
-} __u16_host_order;
-
-/* Types of discovery */
-typedef enum {
-	DISCOVERY_LOG,		/* What's in our discovery log */
-	DISCOVERY_ACTIVE,	/* Doing our own discovery on the medium */
-	DISCOVERY_PASSIVE,	/* Peer doing discovery on the medium */
-	EXPIRY_TIMEOUT,		/* Entry expired due to timeout */
-} DISCOVERY_MODE;
-
-#define NICKNAME_MAX_LEN 21
-
-/* Basic discovery information about a peer */
-typedef struct irda_device_info		discinfo_t;	/* linux/irda.h */
-
-/*
- * The DISCOVERY structure is used for both discovery requests and responses
- */
-typedef struct discovery_t {
-	irda_queue_t	q;		/* Must be first! */
-
-	discinfo_t	data;		/* Basic discovery information */
-	int		name_len;	/* Length of nickname */
-
-	LAP_REASON	condition;	/* More info about the discovery */
-	int		gen_addr_bit;	/* Need to generate a new device
-					 * address? */
-	int		nslots;		/* Number of slots to use when
-					 * discovering */
-	unsigned long	timestamp;	/* Last time discovered */
-	unsigned long	firststamp;	/* First time discovered */
-} discovery_t;
-
-void irlmp_add_discovery(hashbin_t *cachelog, discovery_t *discovery);
-void irlmp_add_discovery_log(hashbin_t *cachelog, hashbin_t *log);
-void irlmp_expire_discoveries(hashbin_t *log, __u32 saddr, int force);
-struct irda_device_info *irlmp_copy_discoveries(hashbin_t *log, int *pn,
-						__u16 mask, int old_entries);
-
-#endif
diff --git a/include/net/irda/ircomm_core.h b/include/net/irda/ircomm_core.h
deleted file mode 100644
index 2a580ce9edad..000000000000
--- a/include/net/irda/ircomm_core.h
+++ /dev/null
@@ -1,106 +0,0 @@
-/*********************************************************************
- *                
- * Filename:      ircomm_core.h
- * Version:       
- * Description:   
- * Status:        Experimental.
- * Author:        Dag Brattli <dagb@cs.uit.no>
- * Created at:    Wed Jun  9 08:58:43 1999
- * Modified at:   Mon Dec 13 11:52:29 1999
- * Modified by:   Dag Brattli <dagb@cs.uit.no>
- * 
- *     Copyright (c) 1999 Dag Brattli, All Rights Reserved.
- *     
- *     This program is free software; you can redistribute it and/or 
- *     modify it under the terms of the GNU General Public License as 
- *     published by the Free Software Foundation; either version 2 of 
- *     the License, or (at your option) any later version.
- * 
- *     This program is distributed in the hope that it will be useful,
- *     but WITHOUT ANY WARRANTY; without even the implied warranty of
- *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- *     GNU General Public License for more details.
- * 
- *     You should have received a copy of the GNU General Public License 
- *     along with this program; if not, see <http://www.gnu.org/licenses/>.
- *     
- ********************************************************************/
-
-#ifndef IRCOMM_CORE_H
-#define IRCOMM_CORE_H
-
-#include <net/irda/irda.h>
-#include <net/irda/irqueue.h>
-#include <net/irda/ircomm_event.h>
-
-#define IRCOMM_MAGIC 0x98347298
-#define IRCOMM_HEADER_SIZE 1
-
-struct ircomm_cb;   /* Forward decl. */
-
-/*
- * A small call-table, so we don't have to check the service-type whenever
- * we want to do something
- */
-typedef struct {
-	int (*data_request)(struct ircomm_cb *, struct sk_buff *, int clen);
-	int (*connect_request)(struct ircomm_cb *, struct sk_buff *, 
-			       struct ircomm_info *);
-	int (*connect_response)(struct ircomm_cb *, struct sk_buff *);
-	int (*disconnect_request)(struct ircomm_cb *, struct sk_buff *, 
-				  struct ircomm_info *);	
-} call_t;
-
-struct ircomm_cb {
-	irda_queue_t queue;
-	magic_t magic;
-
-	notify_t notify;
-	call_t   issue;
-
-	int state;
-	int line;            /* Which TTY line we are using */
-
-	struct tsap_cb *tsap;
-	struct lsap_cb *lsap;
-	
-	__u8 dlsap_sel;      /* Destination LSAP/TSAP selector */
-	__u8 slsap_sel;      /* Source LSAP/TSAP selector */
-
-	__u32 saddr;         /* Source device address (link we are using) */
-	__u32 daddr;         /* Destination device address */
-
-	int max_header_size; /* Header space we must reserve for each frame */
-	int max_data_size;   /* The amount of data we can fill in each frame */
-
-	LOCAL_FLOW flow_status; /* Used by ircomm_lmp */
-	int pkt_count;          /* Number of frames we have sent to IrLAP */
-
-	__u8 service_type;
-};
-
-extern hashbin_t *ircomm;
-
-struct ircomm_cb *ircomm_open(notify_t *notify, __u8 service_type, int line);
-int ircomm_close(struct ircomm_cb *self);
-
-int ircomm_data_request(struct ircomm_cb *self, struct sk_buff *skb);
-void ircomm_data_indication(struct ircomm_cb *self, struct sk_buff *skb);
-void ircomm_process_data(struct ircomm_cb *self, struct sk_buff *skb);
-int ircomm_control_request(struct ircomm_cb *self, struct sk_buff *skb);
-int ircomm_connect_request(struct ircomm_cb *self, __u8 dlsap_sel, 
-			   __u32 saddr, __u32 daddr, struct sk_buff *skb,
-			   __u8 service_type);
-void ircomm_connect_indication(struct ircomm_cb *self, struct sk_buff *skb,
-			       struct ircomm_info *info);
-void ircomm_connect_confirm(struct ircomm_cb *self, struct sk_buff *skb,
-			    struct ircomm_info *info);
-int ircomm_connect_response(struct ircomm_cb *self, struct sk_buff *userdata);
-int ircomm_disconnect_request(struct ircomm_cb *self, struct sk_buff *userdata);
-void ircomm_disconnect_indication(struct ircomm_cb *self, struct sk_buff *skb,
-				  struct ircomm_info *info);
-void ircomm_flow_request(struct ircomm_cb *self, LOCAL_FLOW flow);
-
-#define ircomm_is_connected(self) (self->state == IRCOMM_CONN)
-
-#endif
diff --git a/include/net/irda/ircomm_event.h b/include/net/irda/ircomm_event.h
deleted file mode 100644
index 5bbc32998d57..000000000000
--- a/include/net/irda/ircomm_event.h
+++ /dev/null
@@ -1,83 +0,0 @@
-/*********************************************************************
- *                
- * Filename:      ircomm_event.h
- * Version:       
- * Description:   
- * Status:        Experimental.
- * Author:        Dag Brattli <dagb@cs.uit.no>
- * Created at:    Sun Jun  6 23:51:13 1999
- * Modified at:   Thu Jun 10 08:36:25 1999
- * Modified by:   Dag Brattli <dagb@cs.uit.no>
- * 
- *     Copyright (c) 1999 Dag Brattli, All Rights Reserved.
- *     
- *     This program is free software; you can redistribute it and/or 
- *     modify it under the terms of the GNU General Public License as 
- *     published by the Free Software Foundation; either version 2 of 
- *     the License, or (at your option) any later version.
- * 
- *     This program is distributed in the hope that it will be useful,
- *     but WITHOUT ANY WARRANTY; without even the implied warranty of
- *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- *     GNU General Public License for more details.
- * 
- *     You should have received a copy of the GNU General Public License 
- *     along with this program; if not, see <http://www.gnu.org/licenses/>.
- *     
- ********************************************************************/
-
-#ifndef IRCOMM_EVENT_H
-#define IRCOMM_EVENT_H
-
-#include <net/irda/irmod.h>
-
-typedef enum {
-        IRCOMM_IDLE,
-        IRCOMM_WAITI,
-        IRCOMM_WAITR,
-        IRCOMM_CONN,
-} IRCOMM_STATE;
-
-/* IrCOMM Events */
-typedef enum {
-        IRCOMM_CONNECT_REQUEST,
-        IRCOMM_CONNECT_RESPONSE,
-        IRCOMM_TTP_CONNECT_INDICATION,
-	IRCOMM_LMP_CONNECT_INDICATION,
-        IRCOMM_TTP_CONNECT_CONFIRM,
-	IRCOMM_LMP_CONNECT_CONFIRM,
-
-        IRCOMM_LMP_DISCONNECT_INDICATION,
-	IRCOMM_TTP_DISCONNECT_INDICATION,
-        IRCOMM_DISCONNECT_REQUEST,
-
-        IRCOMM_TTP_DATA_INDICATION,
-	IRCOMM_LMP_DATA_INDICATION,
-        IRCOMM_DATA_REQUEST,
-        IRCOMM_CONTROL_REQUEST,
-        IRCOMM_CONTROL_INDICATION,
-} IRCOMM_EVENT;
-
-/*
- * Used for passing information through the state-machine
- */
-struct ircomm_info {
-        __u32     saddr;               /* Source device address */
-        __u32     daddr;               /* Destination device address */
-        __u8      dlsap_sel;
-        LM_REASON reason;              /* Reason for disconnect */
-	__u32     max_data_size;
-	__u32     max_header_size;
-
-	struct qos_info *qos;
-};
-
-extern const char *const ircomm_state[];
-
-struct ircomm_cb;   /* Forward decl. */
-
-int ircomm_do_event(struct ircomm_cb *self, IRCOMM_EVENT event,
-		    struct sk_buff *skb, struct ircomm_info *info);
-void ircomm_next_state(struct ircomm_cb *self, IRCOMM_STATE state);
-
-#endif
diff --git a/include/net/irda/ircomm_lmp.h b/include/net/irda/ircomm_lmp.h
deleted file mode 100644
index 5042a5021a04..000000000000
--- a/include/net/irda/ircomm_lmp.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/*********************************************************************
- *                
- * Filename:      ircomm_lmp.h
- * Version:       
- * Description:   
- * Status:        Experimental.
- * Author:        Dag Brattli <dagb@cs.uit.no>
- * Created at:    Wed Jun  9 10:06:07 1999
- * Modified at:   Fri Aug 13 07:32:32 1999
- * Modified by:   Dag Brattli <dagb@cs.uit.no>
- * 
- *     Copyright (c) 1999 Dag Brattli, All Rights Reserved.
- *     
- *     This program is free software; you can redistribute it and/or 
- *     modify it under the terms of the GNU General Public License as 
- *     published by the Free Software Foundation; either version 2 of 
- *     the License, or (at your option) any later version.
- * 
- *     This program is distributed in the hope that it will be useful,
- *     but WITHOUT ANY WARRANTY; without even the implied warranty of
- *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- *     GNU General Public License for more details.
- * 
- *     You should have received a copy of the GNU General Public License 
- *     along with this program; if not, see <http://www.gnu.org/licenses/>.
- *     
- ********************************************************************/
-
-#ifndef IRCOMM_LMP_H
-#define IRCOMM_LMP_H
-
-#include <net/irda/ircomm_core.h>
-
-int ircomm_open_lsap(struct ircomm_cb *self);
-
-#endif
diff --git a/include/net/irda/ircomm_param.h b/include/net/irda/ircomm_param.h
deleted file mode 100644
index 1f67432321c4..000000000000
--- a/include/net/irda/ircomm_param.h
+++ /dev/null
@@ -1,147 +0,0 @@
-/*********************************************************************
- *                
- * Filename:      ircomm_param.h
- * Version:       1.0
- * Description:   Parameter handling for the IrCOMM protocol
- * Status:        Experimental.
- * Author:        Dag Brattli <dagb@cs.uit.no>
- * Created at:    Mon Jun  7 08:47:28 1999
- * Modified at:   Wed Aug 25 13:46:33 1999
- * Modified by:   Dag Brattli <dagb@cs.uit.no>
- * 
- *     Copyright (c) 1999 Dag Brattli, All Rights Reserved.
- *     
- *     This program is free software; you can redistribute it and/or 
- *     modify it under the terms of the GNU General Public License as 
- *     published by the Free Software Foundation; either version 2 of 
- *     the License, or (at your option) any later version.
- * 
- *     This program is distributed in the hope that it will be useful,
- *     but WITHOUT ANY WARRANTY; without even the implied warranty of
- *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- *     GNU General Public License for more details.
- * 
- *     You should have received a copy of the GNU General Public License 
- *     along with this program; if not, see <http://www.gnu.org/licenses/>.
- *     
- ********************************************************************/
-
-#ifndef IRCOMM_PARAMS_H
-#define IRCOMM_PARAMS_H
-
-#include <net/irda/parameters.h>
-
-/* Parameters common to all service types */
-#define IRCOMM_SERVICE_TYPE     0x00
-#define IRCOMM_PORT_TYPE        0x01 /* Only used in LM-IAS */
-#define IRCOMM_PORT_NAME        0x02 /* Only used in LM-IAS */
-
-/* Parameters for both 3 wire and 9 wire */
-#define IRCOMM_DATA_RATE        0x10
-#define IRCOMM_DATA_FORMAT      0x11
-#define IRCOMM_FLOW_CONTROL     0x12
-#define IRCOMM_XON_XOFF         0x13
-#define IRCOMM_ENQ_ACK          0x14
-#define IRCOMM_LINE_STATUS      0x15
-#define IRCOMM_BREAK            0x16
-
-/* Parameters for 9 wire */
-#define IRCOMM_DTE              0x20
-#define IRCOMM_DCE              0x21
-#define IRCOMM_POLL             0x22
-
-/* Service type (details) */
-#define IRCOMM_3_WIRE_RAW       0x01
-#define IRCOMM_3_WIRE           0x02
-#define IRCOMM_9_WIRE           0x04
-#define IRCOMM_CENTRONICS       0x08
-
-/* Port type (details) */
-#define IRCOMM_SERIAL           0x00
-#define IRCOMM_PARALLEL         0x01
-
-/* Data format (details) */
-#define IRCOMM_WSIZE_5          0x00
-#define IRCOMM_WSIZE_6          0x01
-#define IRCOMM_WSIZE_7          0x02
-#define IRCOMM_WSIZE_8          0x03
-
-#define IRCOMM_1_STOP_BIT       0x00
-#define IRCOMM_2_STOP_BIT       0x04 /* 1.5 if char len 5 */
-
-#define IRCOMM_PARITY_DISABLE   0x00
-#define IRCOMM_PARITY_ENABLE    0x08
-
-#define IRCOMM_PARITY_ODD       0x00
-#define IRCOMM_PARITY_EVEN      0x10
-#define IRCOMM_PARITY_MARK      0x20
-#define IRCOMM_PARITY_SPACE     0x30
-
-/* Flow control */
-#define IRCOMM_XON_XOFF_IN      0x01
-#define IRCOMM_XON_XOFF_OUT     0x02
-#define IRCOMM_RTS_CTS_IN       0x04
-#define IRCOMM_RTS_CTS_OUT      0x08
-#define IRCOMM_DSR_DTR_IN       0x10
-#define IRCOMM_DSR_DTR_OUT      0x20
-#define IRCOMM_ENQ_ACK_IN       0x40
-#define IRCOMM_ENQ_ACK_OUT      0x80
-
-/* Line status */
-#define IRCOMM_OVERRUN_ERROR    0x02
-#define IRCOMM_PARITY_ERROR     0x04
-#define IRCOMM_FRAMING_ERROR    0x08
-
-/* DTE (Data terminal equipment) line settings */
-#define IRCOMM_DELTA_DTR        0x01
-#define IRCOMM_DELTA_RTS        0x02
-#define IRCOMM_DTR              0x04
-#define IRCOMM_RTS              0x08
-
-/* DCE (Data communications equipment) line settings */
-#define IRCOMM_DELTA_CTS        0x01  /* Clear to send has changed */
-#define IRCOMM_DELTA_DSR        0x02  /* Data set ready has changed */
-#define IRCOMM_DELTA_RI         0x04  /* Ring indicator has changed */
-#define IRCOMM_DELTA_CD         0x08  /* Carrier detect has changed */
-#define IRCOMM_CTS              0x10  /* Clear to send is high */
-#define IRCOMM_DSR              0x20  /* Data set ready is high */
-#define IRCOMM_RI               0x40  /* Ring indicator is high */
-#define IRCOMM_CD               0x80  /* Carrier detect is high */
-#define IRCOMM_DCE_DELTA_ANY    0x0f
-
-/*
- * Parameter state
- */
-struct ircomm_params {
-	/* General control params */
-	__u8  service_type;
-	__u8  port_type;
-	char  port_name[32];
-
-	/* Control params for 3- and 9-wire service type */
-	__u32 data_rate;         /* Data rate in bps */
-	__u8  data_format;
-	__u8  flow_control;
-	char  xonxoff[2];
-	char  enqack[2];
-	__u8  line_status;
-	__u8  _break;
-
-	__u8  null_modem;
-
-	/* Control params for 9-wire service type */
-	__u8 dte;
-	__u8 dce;
-	__u8 poll;
-
-	/* Control params for Centronics service type */
-};
-
-struct ircomm_tty_cb; /* Forward decl. */
-
-int ircomm_param_request(struct ircomm_tty_cb *self, __u8 pi, int flush);
-
-extern pi_param_info_t ircomm_param_info;
-
-#endif /* IRCOMM_PARAMS_H */
-
diff --git a/include/net/irda/ircomm_ttp.h b/include/net/irda/ircomm_ttp.h
deleted file mode 100644
index c5627288bca3..000000000000
--- a/include/net/irda/ircomm_ttp.h
+++ /dev/null
@@ -1,37 +0,0 @@
-/*********************************************************************
- *                
- * Filename:      ircomm_ttp.h
- * Version:       
- * Description:   
- * Status:        Experimental.
- * Author:        Dag Brattli <dagb@cs.uit.no>
- * Created at:    Wed Jun  9 10:06:07 1999
- * Modified at:   Fri Aug 13 07:32:22 1999
- * Modified by:   Dag Brattli <dagb@cs.uit.no>
- * 
- *     Copyright (c) 1999 Dag Brattli, All Rights Reserved.
- *     
- *     This program is free software; you can redistribute it and/or 
- *     modify it under the terms of the GNU General Public License as 
- *     published by the Free Software Foundation; either version 2 of 
- *     the License, or (at your option) any later version.
- * 
- *     This program is distributed in the hope that it will be useful,
- *     but WITHOUT ANY WARRANTY; without even the implied warranty of
- *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- *     GNU General Public License for more details.
- * 
- *     You should have received a copy of the GNU General Public License 
- *     along with this program; if not, see <http://www.gnu.org/licenses/>.
- *     
- ********************************************************************/
-
-#ifndef IRCOMM_TTP_H
-#define IRCOMM_TTP_H
-
-#include <net/irda/ircomm_core.h>
-
-int  ircomm_open_tsap(struct ircomm_cb *self);
-
-#endif
-
diff --git a/include/net/irda/ircomm_tty.h b/include/net/irda/ircomm_tty.h
deleted file mode 100644
index 8d4f588974bc..000000000000
--- a/include/net/irda/ircomm_tty.h
+++ /dev/null
@@ -1,121 +0,0 @@
-/*********************************************************************
- *                
- * Filename:      ircomm_tty.h
- * Version:       
- * Description:   
- * Status:        Experimental.
- * Author:        Dag Brattli <dagb@cs.uit.no>
- * Created at:    Sun Jun  6 23:24:22 1999
- * Modified at:   Fri Jan 28 13:16:57 2000
- * Modified by:   Dag Brattli <dagb@cs.uit.no>
- * 
- *     Copyright (c) 1999-2000 Dag Brattli, All Rights Reserved.
- *     
- *     This program is free software; you can redistribute it and/or 
- *     modify it under the terms of the GNU General Public License as 
- *     published by the Free Software Foundation; either version 2 of 
- *     the License, or (at your option) any later version.
- * 
- *     This program is distributed in the hope that it will be useful,
- *     but WITHOUT ANY WARRANTY; without even the implied warranty of
- *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- *     GNU General Public License for more details.
- * 
- *     You should have received a copy of the GNU General Public License 
- *     along with this program; if not, see <http://www.gnu.org/licenses/>.
- *     
- ********************************************************************/
-
-#ifndef IRCOMM_TTY_H
-#define IRCOMM_TTY_H
-
-#include <linux/serial.h>
-#include <linux/termios.h>
-#include <linux/timer.h>
-#include <linux/tty.h>		/* struct tty_struct */
-
-#include <net/irda/irias_object.h>
-#include <net/irda/ircomm_core.h>
-#include <net/irda/ircomm_param.h>
-
-#define IRCOMM_TTY_PORTS 32
-#define IRCOMM_TTY_MAGIC 0x3432
-#define IRCOMM_TTY_MAJOR 161
-#define IRCOMM_TTY_MINOR 0
-
-/* This is used as an initial value to max_header_size before the proper
- * value is filled in (5 for ttp, 4 for lmp). This allow us to detect
- * the state of the underlying connection. - Jean II */
-#define IRCOMM_TTY_HDR_UNINITIALISED	16
-/* Same for payload size. See qos.c for the smallest max data size */
-#define IRCOMM_TTY_DATA_UNINITIALISED	(64 - IRCOMM_TTY_HDR_UNINITIALISED)
-
-/*
- * IrCOMM TTY driver state
- */
-struct ircomm_tty_cb {
-	irda_queue_t queue;            /* Must be first */
-	struct tty_port port;
-	magic_t magic;
-
-	int state;                /* Connect state */
-
-	struct ircomm_cb *ircomm; /* IrCOMM layer instance */
-
-	struct sk_buff *tx_skb;   /* Transmit buffer */
-	struct sk_buff *ctrl_skb; /* Control data buffer */
-
-	/* Parameters */
-	struct ircomm_params settings;
-
-	__u8 service_type;        /* The service that we support */
-	int client;               /* True if we are a client */
-	LOCAL_FLOW flow;          /* IrTTP flow status */
-
-	int line;
-
-	__u8 dlsap_sel;
-	__u8 slsap_sel;
-
-	__u32 saddr;
-	__u32 daddr;
-
-	__u32 max_data_size;   /* Max data we can transmit in one packet */
-	__u32 max_header_size; /* The amount of header space we must reserve */
-	__u32 tx_data_size;	/* Max data size of current tx_skb */
-
-	struct iriap_cb *iriap; /* Instance used for querying remote IAS */
-	struct ias_object* obj;
-	void *skey;
-	void *ckey;
-
-	struct timer_list watchdog_timer;
-	struct work_struct  tqueue;
-
-	/* Protect concurent access to :
-	 *	o self->ctrl_skb
-	 *	o self->tx_skb
-	 * Maybe other things may gain to be protected as well...
-	 * Jean II */
-	spinlock_t spinlock;
-};
-
-void ircomm_tty_start(struct tty_struct *tty);
-void ircomm_tty_check_modem_status(struct ircomm_tty_cb *self);
-
-int ircomm_tty_tiocmget(struct tty_struct *tty);
-int ircomm_tty_tiocmset(struct tty_struct *tty, unsigned int set,
-			unsigned int clear);
-int ircomm_tty_ioctl(struct tty_struct *tty, unsigned int cmd,
-		     unsigned long arg);
-void ircomm_tty_set_termios(struct tty_struct *tty,
-			    struct ktermios *old_termios);
-
-#endif
-
-
-
-
-
-
-
diff --git a/include/net/irda/ircomm_tty_attach.h b/include/net/irda/ircomm_tty_attach.h
deleted file mode 100644
index 20dcbdf258cf..000000000000
--- a/include/net/irda/ircomm_tty_attach.h
+++ /dev/null
@@ -1,92 +0,0 @@
-/*********************************************************************
- *                
- * Filename:      ircomm_tty_attach.h
- * Version:       
- * Description:   
- * Status:        Experimental.
- * Author:        Dag Brattli <dagb@cs.uit.no>
- * Created at:    Wed Jun  9 15:55:18 1999
- * Modified at:   Fri Dec 10 21:04:55 1999
- * Modified by:   Dag Brattli <dagb@cs.uit.no>
- * 
- *     Copyright (c) 1999 Dag Brattli, All Rights Reserved.
- *     
- *     This program is free software; you can redistribute it and/or 
- *     modify it under the terms of the GNU General Public License as 
- *     published by the Free Software Foundation; either version 2 of 
- *     the License, or (at your option) any later version.
- * 
- *     This program is distributed in the hope that it will be useful,
- *     but WITHOUT ANY WARRANTY; without even the implied warranty of
- *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- *     GNU General Public License for more details.
- * 
- *     You should have received a copy of the GNU General Public License 
- *     along with this program; if not, see <http://www.gnu.org/licenses/>.
- *     
- ********************************************************************/
-
-#ifndef IRCOMM_TTY_ATTACH_H
-#define IRCOMM_TTY_ATTACH_H
-
-#include <net/irda/ircomm_tty.h>
-
-typedef enum {
-        IRCOMM_TTY_IDLE,
-	IRCOMM_TTY_SEARCH,
-        IRCOMM_TTY_QUERY_PARAMETERS,
-	IRCOMM_TTY_QUERY_LSAP_SEL,
-	IRCOMM_TTY_SETUP,
-        IRCOMM_TTY_READY,
-} IRCOMM_TTY_STATE;
-
-/* IrCOMM TTY Events */
-typedef enum {
-	IRCOMM_TTY_ATTACH_CABLE,
-	IRCOMM_TTY_DETACH_CABLE,
-	IRCOMM_TTY_DATA_REQUEST,
-	IRCOMM_TTY_DATA_INDICATION,
-	IRCOMM_TTY_DISCOVERY_REQUEST,
-	IRCOMM_TTY_DISCOVERY_INDICATION,
-	IRCOMM_TTY_CONNECT_CONFIRM,
-	IRCOMM_TTY_CONNECT_INDICATION,
-	IRCOMM_TTY_DISCONNECT_REQUEST,
-	IRCOMM_TTY_DISCONNECT_INDICATION,
-	IRCOMM_TTY_WD_TIMER_EXPIRED,
-	IRCOMM_TTY_GOT_PARAMETERS,
-	IRCOMM_TTY_GOT_LSAPSEL,
-} IRCOMM_TTY_EVENT;
-
-/* Used for passing information through the state-machine */
-struct ircomm_tty_info {
-        __u32     saddr;               /* Source device address */
-        __u32     daddr;               /* Destination device address */
-        __u8      dlsap_sel;
-};
-
-extern const char *const ircomm_state[];
-extern const char *const ircomm_tty_state[];
-
-int ircomm_tty_do_event(struct ircomm_tty_cb *self, IRCOMM_TTY_EVENT event,
-			struct sk_buff *skb, struct ircomm_tty_info *info);
-
-
-int  ircomm_tty_attach_cable(struct ircomm_tty_cb *self);
-void ircomm_tty_detach_cable(struct ircomm_tty_cb *self);
-void ircomm_tty_connect_confirm(void *instance, void *sap, 
-				struct qos_info *qos, 
-				__u32 max_sdu_size, 
-				__u8 max_header_size, 
-				struct sk_buff *skb);
-void ircomm_tty_disconnect_indication(void *instance, void *sap, 
-				      LM_REASON reason,
-				      struct sk_buff *skb);
-void ircomm_tty_connect_indication(void *instance, void *sap, 
-				   struct qos_info *qos, 
-				   __u32 max_sdu_size,
-				   __u8 max_header_size, 
-				   struct sk_buff *skb);
-int ircomm_tty_send_initial_parameters(struct ircomm_tty_cb *self);
-void ircomm_tty_link_established(struct ircomm_tty_cb *self);
-
-#endif /* IRCOMM_TTY_ATTACH_H */
diff --git a/include/net/irda/irda.h b/include/net/irda/irda.h
deleted file mode 100644
index 92c8fb575213..000000000000
--- a/include/net/irda/irda.h
+++ /dev/null
@@ -1,115 +0,0 @@
-/*********************************************************************
- *                
- * Filename:      irda.h
- * Version:       1.0
- * Description:   IrDA common include file for kernel internal use
- * Status:        Stable
- * Author:        Dag Brattli <dagb@cs.uit.no>
- * Created at:    Tue Dec  9 21:13:12 1997
- * Modified at:   Fri Jan 28 13:16:32 2000
- * Modified by:   Dag Brattli <dagb@cs.uit.no>
- * 
- *     Copyright (c) 1998-2000 Dag Brattli, All Rights Reserved.
- *     Copyright (c) 2000-2002 Jean Tourrilhes <jt@hpl.hp.com>
- *      
- *     This program is free software; you can redistribute it and/or 
- *     modify it under the terms of the GNU General Public License as 
- *     published by the Free Software Foundation; either version 2 of 
- *     the License, or (at your option) any later version.
- *  
- *     Neither Dag Brattli nor University of Tromsø admit liability nor
- *     provide warranty for any of this software. This material is 
- *     provided "AS-IS" and at no charge.
- *     
- ********************************************************************/
-
-#ifndef NET_IRDA_H
-#define NET_IRDA_H
-
-#include <linux/skbuff.h>		/* struct sk_buff */
-#include <linux/kernel.h>
-#include <linux/if.h>			/* sa_family_t in <linux/irda.h> */
-#include <linux/irda.h>
-
-typedef __u32 magic_t;
-
-#ifndef TRUE
-#define TRUE 1
-#endif
-
-#ifndef FALSE 
-#define FALSE 0
-#endif
-
-/* Hack to do small backoff when setting media busy in IrLAP */
-#ifndef SMALL
-#define SMALL 5
-#endif
-
-#ifndef IRDA_MIN /* Lets not mix this MIN with other header files */
-#define IRDA_MIN(a, b) (((a) < (b)) ? (a) : (b))
-#endif
-
-#ifndef IRDA_ALIGN
-#  define IRDA_ALIGN __attribute__((aligned))
-#endif
-
-#ifdef CONFIG_IRDA_DEBUG
-#define IRDA_ASSERT(expr, func) \
-do { if(!(expr)) { \
-	printk( "Assertion failed! %s:%s:%d %s\n", \
-		__FILE__,__func__,__LINE__,(#expr) ); \
-	func } } while (0)
-#define IRDA_ASSERT_LABEL(label)	label
-#else
-#define IRDA_ASSERT(expr, func) do { (void)(expr); } while (0)
-#define IRDA_ASSERT_LABEL(label)
-#endif /* CONFIG_IRDA_DEBUG */
-
-/*
- *  Magic numbers used by Linux-IrDA. Random numbers which must be unique to 
- *  give the best protection
- */
-
-#define IRTTY_MAGIC        0x2357
-#define LAP_MAGIC          0x1357
-#define LMP_MAGIC          0x4321
-#define LMP_LSAP_MAGIC     0x69333
-#define LMP_LAP_MAGIC      0x3432
-#define IRDA_DEVICE_MAGIC  0x63454
-#define IAS_MAGIC          0x007
-#define TTP_MAGIC          0x241169
-#define TTP_TSAP_MAGIC     0x4345
-#define IROBEX_MAGIC       0x341324
-#define HB_MAGIC           0x64534
-#define IRLAN_MAGIC        0x754
-#define IAS_OBJECT_MAGIC   0x34234
-#define IAS_ATTRIB_MAGIC   0x45232
-#define IRDA_TASK_MAGIC    0x38423
-
-#define IAS_DEVICE_ID 0x0000 /* Defined by IrDA, IrLMP section 4.1 (page 68) */
-#define IAS_PNP_ID    0xd342
-#define IAS_OBEX_ID   0x34323
-#define IAS_IRLAN_ID  0x34234
-#define IAS_IRCOMM_ID 0x2343
-#define IAS_IRLPT_ID  0x9876
-
-struct net_device;
-struct packet_type;
-
-void irda_proc_register(void);
-void irda_proc_unregister(void);
-
-int irda_sysctl_register(void);
-void irda_sysctl_unregister(void);
-
-int irsock_init(void);
-void irsock_cleanup(void);
-
-int irda_nl_register(void);
-void irda_nl_unregister(void);
-
-int irlap_driver_rcv(struct sk_buff *skb, struct net_device *dev,
-		     struct packet_type *ptype, struct net_device *orig_dev);
-
-#endif /* NET_IRDA_H */
diff --git a/include/net/irda/irda_device.h b/include/net/irda/irda_device.h
deleted file mode 100644
index 664bf8178412..000000000000
--- a/include/net/irda/irda_device.h
+++ /dev/null
@@ -1,285 +0,0 @@
-/*********************************************************************
- *                
- * Filename:      irda_device.h
- * Version:       0.9
- * Description:   Contains various declarations used by the drivers
- * Status:        Experimental.
- * Author:        Dag Brattli <dagb@cs.uit.no>
- * Created at:    Tue Apr 14 12:41:42 1998
- * Modified at:   Mon Mar 20 09:08:57 2000
- * Modified by:   Dag Brattli <dagb@cs.uit.no>
- * 
- *     Copyright (c) 1999-2000 Dag Brattli, All Rights Reserved.
- *     Copyright (c) 1998 Thomas Davis, <ratbert@radiks.net>,
- *     Copyright (c) 2000-2002 Jean Tourrilhes <jt@hpl.hp.com>
- *
- *     This program is free software; you can redistribute it and/or 
- *     modify it under the terms of the GNU General Public License as 
- *     published by the Free Software Foundation; either version 2 of 
- *     the License, or (at your option) any later version.
- * 
- *     This program is distributed in the hope that it will be useful,
- *     but WITHOUT ANY WARRANTY; without even the implied warranty of
- *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- *     GNU General Public License for more details.
- * 
- *     You should have received a copy of the GNU General Public License 
- *     along with this program; if not, see <http://www.gnu.org/licenses/>.
- *     
- ********************************************************************/
-
-/*
- * This header contains all the IrDA definitions a driver really
- * needs, and therefore the driver should not need to include
- * any other IrDA headers - Jean II
- */
-
-#ifndef IRDA_DEVICE_H
-#define IRDA_DEVICE_H
-
-#include <linux/tty.h>
-#include <linux/netdevice.h>
-#include <linux/spinlock.h>
-#include <linux/skbuff.h>		/* struct sk_buff */
-#include <linux/irda.h>
-#include <linux/types.h>
-
-#include <net/pkt_sched.h>
-#include <net/irda/irda.h>
-#include <net/irda/qos.h>		/* struct qos_info */
-#include <net/irda/irqueue.h>		/* irda_queue_t */
-
-/* A few forward declarations (to make compiler happy) */
-struct irlap_cb;
-
-/* Some non-standard interface flags (should not conflict with any in if.h) */
-#define IFF_SIR 	0x0001 /* Supports SIR speeds */
-#define IFF_MIR 	0x0002 /* Supports MIR speeds */
-#define IFF_FIR 	0x0004 /* Supports FIR speeds */
-#define IFF_VFIR        0x0008 /* Supports VFIR speeds */
-#define IFF_PIO   	0x0010 /* Supports PIO transfer of data */
-#define IFF_DMA		0x0020 /* Supports DMA transfer of data */
-#define IFF_SHM         0x0040 /* Supports shared memory data transfers */
-#define IFF_DONGLE      0x0080 /* Interface has a dongle attached */
-#define IFF_AIR         0x0100 /* Supports Advanced IR (AIR) standards */
-
-#define IO_XMIT 0x01
-#define IO_RECV 0x02
-
-typedef enum {
-	IRDA_IRLAP, /* IrDA mode, and deliver to IrLAP */
-	IRDA_RAW,   /* IrDA mode */
-	SHARP_ASK,
-	TV_REMOTE,  /* Also known as Consumer Electronics IR */
-} INFRARED_MODE;
-
-typedef enum {
-	IRDA_TASK_INIT,        /* All tasks are initialized with this state */
-	IRDA_TASK_DONE,        /* Signals that the task is finished */
-	IRDA_TASK_WAIT,
-	IRDA_TASK_WAIT1,
-	IRDA_TASK_WAIT2,
-	IRDA_TASK_WAIT3,
-	IRDA_TASK_CHILD_INIT,  /* Initializing child task */
-	IRDA_TASK_CHILD_WAIT,  /* Waiting for child task to finish */
-	IRDA_TASK_CHILD_DONE   /* Child task is finished */
-} IRDA_TASK_STATE;
-
-struct irda_task;
-typedef int (*IRDA_TASK_CALLBACK) (struct irda_task *task);
-
-struct irda_task {
-	irda_queue_t q;
-	magic_t magic;
-
-	IRDA_TASK_STATE state;
-	IRDA_TASK_CALLBACK function;
-	IRDA_TASK_CALLBACK finished;
-
-	struct irda_task *parent;
-	struct timer_list timer;
-
-	void *instance; /* Instance being called */
-	void *param;    /* Parameter to be used by instance */
-};
-
-/* Dongle info */
-struct dongle_reg;
-typedef struct {
-	struct dongle_reg *issue;     /* Registration info */
-	struct net_device *dev;           /* Device we are attached to */
-	struct irda_task *speed_task; /* Task handling speed change */
-	struct irda_task *reset_task; /* Task handling reset */
-	__u32 speed;                  /* Current speed */
-
-	/* Callbacks to the IrDA device driver */
-	int (*set_mode)(struct net_device *, int mode);
-	int (*read)(struct net_device *dev, __u8 *buf, int len);
-	int (*write)(struct net_device *dev, __u8 *buf, int len);
-	int (*set_dtr_rts)(struct net_device *dev, int dtr, int rts);
-} dongle_t;
-
-/* Dongle registration info */
-struct dongle_reg {
-	irda_queue_t q;         /* Must be first */
-	IRDA_DONGLE type;
-
-	void (*open)(dongle_t *dongle, struct qos_info *qos);
-	void (*close)(dongle_t *dongle);
-	int  (*reset)(struct irda_task *task);
-	int  (*change_speed)(struct irda_task *task);
-	struct module *owner;
-};
-
-/* 
- * Per-packet information we need to hide inside sk_buff 
- * (must not exceed 48 bytes, check with struct sk_buff)
- * The default_qdisc_pad field is a temporary hack.
- */
-struct irda_skb_cb {
-	unsigned int default_qdisc_pad;
-	magic_t magic;       /* Be sure that we can trust the information */
-	__u32   next_speed;  /* The Speed to be set *after* this frame */
-	__u16   mtt;         /* Minimum turn around time */
-	__u16   xbofs;       /* Number of xbofs required, used by SIR mode */
-	__u16   next_xbofs;  /* Number of xbofs required *after* this frame */
-	void    *context;    /* May be used by drivers */
-	void    (*destructor)(struct sk_buff *skb); /* Used for flow control */
-	__u16   xbofs_delay; /* Number of xbofs used for generating the mtt */
-	__u8    line;        /* Used by IrCOMM in IrLPT mode */
-};
-
-/* Chip specific info */
-typedef struct {
-	int cfg_base;         /* Config register IO base */
-        int sir_base;         /* SIR IO base */
-	int fir_base;         /* FIR IO base */
-	int mem_base;         /* Shared memory base */
-        int sir_ext;          /* Length of SIR iobase */
-	int fir_ext;          /* Length of FIR iobase */
-        int irq, irq2;        /* Interrupts used */
-        int dma, dma2;        /* DMA channel(s) used */
-        int fifo_size;        /* FIFO size */
-        int irqflags;         /* interrupt flags (ie, IRQF_SHARED) */
-	int direction;        /* Link direction, used by some FIR drivers */
-	int enabled;          /* Powered on? */
-	int suspended;        /* Suspended by APM */
-	__u32 speed;          /* Currently used speed */
-	__u32 new_speed;      /* Speed we must change to when Tx is finished */
-	int dongle_id;        /* Dongle or transceiver currently used */
-} chipio_t;
-
-/* IO buffer specific info (inspired by struct sk_buff) */
-typedef struct {
-	int state;            /* Receiving state (transmit state not used) */
-	int in_frame;         /* True if receiving frame */
-
-	__u8 *head;	      /* start of buffer */
-	__u8 *data;	      /* start of data in buffer */
-
-	int len;	      /* current length of data */
-	int truesize;	      /* total allocated size of buffer */
-	__u16 fcs;
-
-	struct sk_buff *skb;	/* ZeroCopy Rx in async_unwrap_char() */
-} iobuff_t;
-
-/* Maximum SIR frame (skb) that we expect to receive *unwrapped*.
- * Max LAP MTU (I field) is 2048 bytes max (IrLAP 1.1, chapt 6.6.5, p40).
- * Max LAP header is 2 bytes (for now).
- * Max CRC is 2 bytes at SIR, 4 bytes at FIR. 
- * Need 1 byte for skb_reserve() to align IP header for IrLAN.
- * Add a few extra bytes just to be safe (buffer is power of two anyway)
- * Jean II */
-#define IRDA_SKB_MAX_MTU	2064
-/* Maximum SIR frame that we expect to send, wrapped (i.e. with XBOFS
- * and escaped characters on top of above). */
-#define IRDA_SIR_MAX_FRAME	4269
-
-/* The SIR unwrapper async_unwrap_char() will use a Rx-copy-break mechanism
- * when using the optional ZeroCopy Rx, where only small frames are memcpy
- * to a smaller skb to save memory. This is the threshold under which copy
- * will happen (and over which it won't happen).
- * Some FIR drivers may use this #define as well...
- * This is the same value as various Ethernet drivers. - Jean II */
-#define IRDA_RX_COPY_THRESHOLD  256
-
-/* Function prototypes */
-int  irda_device_init(void);
-void irda_device_cleanup(void);
-
-/* IrLAP entry points used by the drivers.
- * We declare them here to avoid the driver pulling a whole bunch stack
- * headers they don't really need - Jean II */
-struct irlap_cb *irlap_open(struct net_device *dev, struct qos_info *qos,
-			    const char *hw_name);
-void irlap_close(struct irlap_cb *self);
-
-/* Interface to be uses by IrLAP */
-void irda_device_set_media_busy(struct net_device *dev, int status);
-int  irda_device_is_media_busy(struct net_device *dev);
-int  irda_device_is_receiving(struct net_device *dev);
-
-/* Interface for internal use */
-static inline int irda_device_txqueue_empty(const struct net_device *dev)
-{
-	return qdisc_all_tx_empty(dev);
-}
-int  irda_device_set_raw_mode(struct net_device* self, int status);
-struct net_device *alloc_irdadev(int sizeof_priv);
-
-void irda_setup_dma(int channel, dma_addr_t buffer, int count, int mode);
-
-/*
- * Function irda_get_mtt (skb)
- *
- *    Utility function for getting the minimum turnaround time out of 
- *    the skb, where it has been hidden in the cb field.
- */
-static inline __u16 irda_get_mtt(const struct sk_buff *skb)
-{
-	const struct irda_skb_cb *cb = (const struct irda_skb_cb *) skb->cb;
-	return (cb->magic == LAP_MAGIC) ? cb->mtt : 10000;
-}
-
-/*
- * Function irda_get_next_speed (skb)
- *
- *    Extract the speed that should be set *after* this frame from the skb
- *
- * Note : return -1 for user space frames
- */
-static inline __u32 irda_get_next_speed(const struct sk_buff *skb)
-{
-	const struct irda_skb_cb *cb = (const struct irda_skb_cb *) skb->cb;
-	return (cb->magic == LAP_MAGIC) ? cb->next_speed : -1;
-}
-
-/*
- * Function irda_get_next_xbofs (skb)
- *
- *    Extract the xbofs that should be set for this frame from the skb
- *
- * Note : default to 10 for user space frames
- */
-static inline __u16 irda_get_xbofs(const struct sk_buff *skb)
-{
-	const struct irda_skb_cb *cb = (const struct irda_skb_cb *) skb->cb;
-	return (cb->magic == LAP_MAGIC) ? cb->xbofs : 10;
-}
-
-/*
- * Function irda_get_next_xbofs (skb)
- *
- *    Extract the xbofs that should be set *after* this frame from the skb
- *
- * Note : return -1 for user space frames
- */
-static inline __u16 irda_get_next_xbofs(const struct sk_buff *skb)
-{
-	const struct irda_skb_cb *cb = (const struct irda_skb_cb *) skb->cb;
-	return (cb->magic == LAP_MAGIC) ? cb->next_xbofs : -1;
-}
-#endif /* IRDA_DEVICE_H */
-
-
diff --git a/include/net/irda/iriap.h b/include/net/irda/iriap.h
deleted file mode 100644
index fcc896491a95..000000000000
--- a/include/net/irda/iriap.h
+++ /dev/null
@@ -1,108 +0,0 @@
-/*********************************************************************
- *                
- * Filename:      iriap.h
- * Version:       0.5
- * Description:   Information Access Protocol (IAP)
- * Status:        Experimental.
- * Author:        Dag Brattli <dagb@cs.uit.no>
- * Created at:    Thu Aug 21 00:02:07 1997
- * Modified at:   Sat Dec 25 16:42:09 1999
- * Modified by:   Dag Brattli <dagb@cs.uit.no>
- * 
- *     Copyright (c) 1997-1999 Dag Brattli <dagb@cs.uit.no>, 
- *     All Rights Reserved.
- *     
- *     This program is free software; you can redistribute it and/or 
- *     modify it under the terms of the GNU General Public License as 
- *     published by the Free Software Foundation; either version 2 of 
- *     the License, or (at your option) any later version.
- *
- *     Neither Dag Brattli nor University of Tromsø admit liability nor
- *     provide warranty for any of this software. This material is 
- *     provided "AS-IS" and at no charge.
- *
- ********************************************************************/
-
-#ifndef IRIAP_H
-#define IRIAP_H
-
-#include <linux/types.h>
-#include <linux/skbuff.h>
-
-#include <net/irda/iriap_event.h>
-#include <net/irda/irias_object.h>
-#include <net/irda/irqueue.h>		/* irda_queue_t */
-#include <net/irda/timer.h>		/* struct timer_list */
-
-#define IAP_LST 0x80
-#define IAP_ACK 0x40
-
-#define IAS_SERVER 0
-#define IAS_CLIENT 1
-
-/* IrIAP Op-codes */
-#define GET_INFO_BASE      0x01
-#define GET_OBJECTS        0x02
-#define GET_VALUE          0x03
-#define GET_VALUE_BY_CLASS 0x04
-#define GET_OBJECT_INFO    0x05
-#define GET_ATTRIB_NAMES   0x06
-
-#define IAS_SUCCESS        0
-#define IAS_CLASS_UNKNOWN  1
-#define IAS_ATTRIB_UNKNOWN 2
-#define IAS_DISCONNECT     10
-
-typedef void (*CONFIRM_CALLBACK)(int result, __u16 obj_id, 
-				 struct ias_value *value, void *priv);
-
-struct iriap_cb {
-	irda_queue_t q; /* Must be first */	
-	magic_t magic;  /* Magic cookie */
-
-	int          mode;   /* Client or server */
-
-	__u32        saddr;
-	__u32        daddr;
-	__u8         operation;
-
-	struct sk_buff *request_skb;
-	struct lsap_cb *lsap;
-	__u8 slsap_sel;
-
-	/* Client states */
-	IRIAP_STATE client_state;
-	IRIAP_STATE call_state;
-	
-	/* Server states */
-	IRIAP_STATE server_state;
-	IRIAP_STATE r_connect_state;
-	
-	CONFIRM_CALLBACK confirm;
-	void *priv;                /* Used to identify client */
-
-	__u8 max_header_size;
-	__u32 max_data_size;
-	
-	struct timer_list watchdog_timer;
-};
-
-int  iriap_init(void);
-void iriap_cleanup(void);
-
-struct iriap_cb *iriap_open(__u8 slsap_sel, int mode, void *priv, 
-			    CONFIRM_CALLBACK callback);
-void iriap_close(struct iriap_cb *self);
-
-int iriap_getvaluebyclass_request(struct iriap_cb *self, 
-				  __u32 saddr, __u32 daddr,
-				  char *name, char *attr);
-void iriap_connect_request(struct iriap_cb *self);
-void iriap_send_ack( struct iriap_cb *self);
-void iriap_call_indication(struct iriap_cb *self, struct sk_buff *skb);
-
-void iriap_register_server(void);
-
-#endif
-
-
diff --git a/include/net/irda/iriap_event.h b/include/net/irda/iriap_event.h
deleted file mode 100644
index 89747f06d9eb..000000000000
--- a/include/net/irda/iriap_event.h
+++ /dev/null
@@ -1,85 +0,0 @@
-/*********************************************************************
- *                
- * Filename:      iriap_event.h
- * Version:       
- * Description:   
- * Status:        Experimental.
- * Author:        Dag Brattli <dagb@cs.uit.no>
- * Created at:    Mon Aug  4 20:40:53 1997
- * Modified at:   Sun Oct 31 22:02:54 1999
- * Modified by:   Dag Brattli <dagb@cs.uit.no>
- * 
- *     Copyright (c) 1998-1999 Dag Brattli <dagb@cs.uit.no>, All Rights Reserved.
- *     
- *     This program is free software; you can redistribute it and/or 
- *     modify it under the terms of the GNU General Public License as 
- *     published by the Free Software Foundation; either version 2 of 
- *     the License, or (at your option) any later version.
- *
- *     Neither Dag Brattli nor University of Tromsø admit liability nor
- *     provide warranty for any of this software. This material is 
- *     provided "AS-IS" and at no charge.
- *
- ********************************************************************/
-
-#ifndef IRIAP_FSM_H
-#define IRIAP_FSM_H
-
-/* Forward because of circular include dependecies */
-struct iriap_cb;
-
-/* IrIAP states */
-typedef enum {
-	/* Client */
-	S_DISCONNECT,
-	S_CONNECTING,
-	S_CALL,
-
-	/* S-Call */
-	S_MAKE_CALL,
-	S_CALLING,
-	S_OUTSTANDING,
-	S_REPLYING,
-	S_WAIT_FOR_CALL,
-	S_WAIT_ACTIVE,
-
-	/* Server */
-	R_DISCONNECT,
-	R_CALL,
-	
-	/* R-Connect */
-	R_WAITING,
-	R_WAIT_ACTIVE,
-	R_RECEIVING,
-	R_EXECUTE,
-	R_RETURNING,
-} IRIAP_STATE;
-
-typedef enum {
-	IAP_CALL_REQUEST,
-	IAP_CALL_REQUEST_GVBC,
-	IAP_CALL_RESPONSE,
-	IAP_RECV_F_LST,
-	IAP_LM_DISCONNECT_INDICATION,
-	IAP_LM_CONNECT_INDICATION,
-	IAP_LM_CONNECT_CONFIRM,
-} IRIAP_EVENT;
-
-void iriap_next_client_state   (struct iriap_cb *self, IRIAP_STATE state);
-void iriap_next_call_state     (struct iriap_cb *self, IRIAP_STATE state);
-void iriap_next_server_state   (struct iriap_cb *self, IRIAP_STATE state);
-void iriap_next_r_connect_state(struct iriap_cb *self, IRIAP_STATE state);
-
-
-void iriap_do_client_event(struct iriap_cb *self, IRIAP_EVENT event, 
-			   struct sk_buff *skb);
-void iriap_do_call_event  (struct iriap_cb *self, IRIAP_EVENT event, 
-			   struct sk_buff *skb);
-
-void iriap_do_server_event   (struct iriap_cb *self, IRIAP_EVENT event, 
-			      struct sk_buff *skb);
-void iriap_do_r_connect_event(struct iriap_cb *self, IRIAP_EVENT event, 
-			      struct sk_buff *skb);
-
-#endif /* IRIAP_FSM_H */
-
diff --git a/include/net/irda/irias_object.h b/include/net/irda/irias_object.h
deleted file mode 100644
index 83f78081799c..000000000000
--- a/include/net/irda/irias_object.h
+++ /dev/null
@@ -1,108 +0,0 @@
-/*********************************************************************
- *                
- * Filename:      irias_object.h
- * Version:       
- * Description:   
- * Status:        Experimental.
- * Author:        Dag Brattli <dagb@cs.uit.no>
- * Created at:    Thu Oct  1 22:49:50 1998
- * Modified at:   Wed Dec 15 11:20:57 1999
- * Modified by:   Dag Brattli <dagb@cs.uit.no>
- * 
- *     Copyright (c) 1998-1999 Dag Brattli, All Rights Reserved.
- *      
- *     This program is free software; you can redistribute it and/or 
- *     modify it under the terms of the GNU General Public License as 
- *     published by the Free Software Foundation; either version 2 of 
- *     the License, or (at your option) any later version.
- *  
- *     Neither Dag Brattli nor University of Tromsø admit liability nor
- *     provide warranty for any of this software. This material is 
- *     provided "AS-IS" and at no charge.
- *     
- ********************************************************************/
-
-#ifndef LM_IAS_OBJECT_H
-#define LM_IAS_OBJECT_H
-
-#include <net/irda/irda.h>
-#include <net/irda/irqueue.h>
-
-/* LM-IAS Attribute types */
-#define IAS_MISSING 0
-#define IAS_INTEGER 1
-#define IAS_OCT_SEQ 2
-#define IAS_STRING  3
-
-/* Object ownership of attributes (user or kernel) */
-#define IAS_KERNEL_ATTR	0
-#define IAS_USER_ATTR	1
-
-/*
- *  LM-IAS Object
- */
-struct ias_object {
-	irda_queue_t q;     /* Must be first! */
-	magic_t magic;
-	
-	char  *name;
-	int   id;
-	hashbin_t *attribs;
-};
-
-/*
- *  Values used by LM-IAS attributes
- */
-struct ias_value {
-        __u8    type;    /* Value description */
-	__u8	owner;	/* Managed from user/kernel space */
-	int     charset; /* Only used by string type */
-        int     len;
-	
-	/* Value */
-	union {
-		int integer;
-		char *string;
-		__u8 *oct_seq;
-	} t;
-};
-
-/*
- *  Attributes used by LM-IAS objects
- */
-struct ias_attrib {
-	irda_queue_t q; /* Must be first! */
-	int magic;
-
-        char *name;   	         /* Attribute name */
-	struct ias_value *value; /* Attribute value */
-};
-
-struct ias_object *irias_new_object(char *name, int id);
-void irias_insert_object(struct ias_object *obj);
-int  irias_delete_object(struct ias_object *obj);
-int  irias_delete_attrib(struct ias_object *obj, struct ias_attrib *attrib,
-			 int cleanobject);
-void __irias_delete_object(struct ias_object *obj);
-
-void irias_add_integer_attrib(struct ias_object *obj, char *name, int value,
-			      int user);
-void irias_add_string_attrib(struct ias_object *obj, char *name, char *value,
-			     int user);
-void irias_add_octseq_attrib(struct ias_object *obj, char *name, __u8 *octets,
-			     int len, int user);
-int irias_object_change_attribute(char *obj_name, char *attrib_name, 
-				  struct ias_value *new_value);
-struct ias_object *irias_find_object(char *name);
-struct ias_attrib *irias_find_attrib(struct ias_object *obj, char *name);
-
-struct ias_value *irias_new_string_value(char *string);
-struct ias_value *irias_new_integer_value(int integer);
-struct ias_value *irias_new_octseq_value(__u8 *octseq , int len);
-struct ias_value *irias_new_missing_value(void);
-void irias_delete_value(struct ias_value *value);
-
-extern struct ias_value irias_missing;
-extern hashbin_t *irias_objects;
-
-#endif
diff --git a/include/net/irda/irlan_client.h b/include/net/irda/irlan_client.h
deleted file mode 100644
index fa8455eda280..000000000000
--- a/include/net/irda/irlan_client.h
+++ /dev/null
@@ -1,42 +0,0 @@
-/*********************************************************************
- *                
- * Filename:      irlan_client.h
- * Version:       0.3
- * Description:   IrDA LAN access layer
- * Status:        Experimental.
- * Author:        Dag Brattli <dagb@cs.uit.no>
- * Created at:    Sun Aug 31 20:14:37 1997
- * Modified at:   Thu Apr 22 14:13:34 1999
- * Modified by:   Dag Brattli <dagb@cs.uit.no>
- * 
- *     Copyright (c) 1998 Dag Brattli <dagb@cs.uit.no>, All Rights Reserved.
- *     
- *     This program is free software; you can redistribute it and/or 
- *     modify it under the terms of the GNU General Public License as 
- *     published by the Free Software Foundation; either version 2 of 
- *     the License, or (at your option) any later version.
- *
- *     Neither Dag Brattli nor University of Tromsø admit liability nor
- *     provide warranty for any of this software. This material is 
- *     provided "AS-IS" and at no charge.
- *
- ********************************************************************/
-
-#ifndef IRLAN_CLIENT_H
-#define IRLAN_CLIENT_H
-
-#include <linux/kernel.h>
-#include <linux/types.h>
-#include <linux/skbuff.h>
-#include <linux/netdevice.h>
-
-#include <net/irda/irias_object.h>
-#include <net/irda/irlan_event.h>
-
-void irlan_client_discovery_indication(discinfo_t *, DISCOVERY_MODE, void *);
-void irlan_client_wakeup(struct irlan_cb *self, __u32 saddr, __u32 daddr);
-
-void irlan_client_parse_response(struct irlan_cb *self, struct sk_buff *skb);
-void irlan_client_get_value_confirm(int result, __u16 obj_id, 
-				    struct ias_value *value, void *priv);
-#endif
diff --git a/include/net/irda/irlan_common.h b/include/net/irda/irlan_common.h
deleted file mode 100644
index 550c2d6ec7ff..000000000000
--- a/include/net/irda/irlan_common.h
+++ /dev/null
@@ -1,230 +0,0 @@
-/*********************************************************************
- *                
- * Filename:      irlan_common.h
- * Version:       0.8
- * Description:   IrDA LAN access layer
- * Status:        Experimental.
- * Author:        Dag Brattli <dagb@cs.uit.no>
- * Created at:    Sun Aug 31 20:14:37 1997
- * Modified at:   Sun Oct 31 19:41:24 1999
- * Modified by:   Dag Brattli <dagb@cs.uit.no>
- * 
- *     Copyright (c) 1998-1999 Dag Brattli <dagb@cs.uit.no>, 
- *     All Rights Reserved.
- *     
- *     This program is free software; you can redistribute it and/or 
- *     modify it under the terms of the GNU General Public License as 
- *     published by the Free Software Foundation; either version 2 of 
- *     the License, or (at your option) any later version.
- *
- *     Neither Dag Brattli nor University of Tromsø admit liability nor
- *     provide warranty for any of this software. This material is 
- *     provided "AS-IS" and at no charge.
- *
- ********************************************************************/
-
-#ifndef IRLAN_H
-#define IRLAN_H
-
-#include <asm/param.h>  /* for HZ */
-
-#include <linux/kernel.h>
-#include <linux/types.h>
-#include <linux/skbuff.h>
-#include <linux/netdevice.h>
-#include <linux/if_ether.h>
-
-#include <net/irda/irttp.h>
-
-#define IRLAN_MTU        1518
-#define IRLAN_TIMEOUT    10*HZ /* 10 seconds */
-
-/* Command packet types */
-#define CMD_GET_PROVIDER_INFO   0
-#define CMD_GET_MEDIA_CHAR      1
-#define CMD_OPEN_DATA_CHANNEL   2
-#define CMD_CLOSE_DATA_CHAN     3
-#define CMD_RECONNECT_DATA_CHAN 4
-#define CMD_FILTER_OPERATION    5
-
-/* Some responses */
-#define RSP_SUCCESS                 0
-#define RSP_INSUFFICIENT_RESOURCES  1
-#define RSP_INVALID_COMMAND_FORMAT  2
-#define RSP_COMMAND_NOT_SUPPORTED   3
-#define RSP_PARAM_NOT_SUPPORTED     4
-#define RSP_VALUE_NOT_SUPPORTED     5
-#define RSP_NOT_OPEN                6
-#define RSP_AUTHENTICATION_REQUIRED 7
-#define RSP_INVALID_PASSWORD        8
-#define RSP_PROTOCOL_ERROR          9
-#define RSP_ASYNCHRONOUS_ERROR    255
-
-/* Media types */
-#define MEDIA_802_3 1
-#define MEDIA_802_5 2
-
-/* Filter parameters */
-#define DATA_CHAN   1
-#define FILTER_TYPE 2
-#define FILTER_MODE 3
-
-/* Filter types */
-#define IRLAN_DIRECTED   0x01
-#define IRLAN_FUNCTIONAL 0x02
-#define IRLAN_GROUP      0x04
-#define IRLAN_MAC_FRAME  0x08
-#define IRLAN_MULTICAST  0x10
-#define IRLAN_BROADCAST  0x20
-#define IRLAN_IPX_SOCKET 0x40
-
-/* Filter modes */
-#define ALL     1
-#define FILTER  2
-#define NONE    3
-
-/* Filter operations */
-#define GET     1
-#define CLEAR   2
-#define ADD     3
-#define REMOVE  4
-#define DYNAMIC 5
-
-/* Access types */
-#define ACCESS_DIRECT  1
-#define ACCESS_PEER    2
-#define ACCESS_HOSTED  3
-
-#define IRLAN_BYTE   0
-#define IRLAN_SHORT  1
-#define IRLAN_ARRAY  2
-
-/* IrLAN sits on top if IrTTP */
-#define IRLAN_MAX_HEADER (TTP_HEADER+LMP_HEADER)
-/* 1 byte for the command code and 1 byte for the parameter count */
-#define IRLAN_CMD_HEADER 2
-
-#define IRLAN_STRING_PARAMETER_LEN(name, value) (1 + strlen((name)) + 2 \
-						+ strlen ((value)))
-#define IRLAN_BYTE_PARAMETER_LEN(name)          (1 + strlen((name)) + 2 + 1)
-#define IRLAN_SHORT_PARAMETER_LEN(name)         (1 + strlen((name)) + 2 + 2)
-
-/*
- *  IrLAN client
- */
-struct irlan_client_cb {
-	int state;
-
-	int open_retries;
-
-	struct tsap_cb *tsap_ctrl;
-	__u32 max_sdu_size;
-	__u8  max_header_size;
-	
-	int access_type;         /* Access type of provider */
-	__u8 reconnect_key[255];
-	__u8 key_len;
-	
-	__u16 recv_arb_val;
-	__u16 max_frame;
-	int filter_type;
-
-	int unicast_open;
-	int broadcast_open;
-
-	int tx_busy;
-	struct sk_buff_head txq; /* Transmit control queue */
-
-	struct iriap_cb *iriap;
-
-	struct timer_list kick_timer;
-};
-
-/*
- * IrLAN provider
- */
-struct irlan_provider_cb {
-	int state;
-	
-	struct tsap_cb *tsap_ctrl;
-	__u32 max_sdu_size;
-	__u8  max_header_size;
-
-	/*
-	 *  Store some values here which are used by the provider to parse
-	 *  the filter operations
-	 */
-	int data_chan;
-	int filter_type;
-	int filter_mode;
-	int filter_operation;
-	int filter_entry;
-	int access_type;     /* Access type */
-	__u16 send_arb_val;
-
-	__u8 mac_address[ETH_ALEN]; /* Generated MAC address for peer device */
-};
-
-/*
- *  IrLAN control block
- */
-struct irlan_cb {
-	int    magic;
-	struct list_head  dev_list;
-	struct net_device *dev;        /* Ethernet device structure*/
-
-	__u32 saddr;               /* Source device address */
-	__u32 daddr;               /* Destination device address */
-	int disconnect_reason;     /* Why we got disconnected */
-	
-	int media;                 /* Media type */
-	__u8 version[2];           /* IrLAN version */
-	
-	struct tsap_cb *tsap_data; /* Data TSAP */
-
-	int  use_udata;            /* Use Unit Data transfers */
-
-	__u8 stsap_sel_data;       /* Source data TSAP selector */
-	__u8 dtsap_sel_data;       /* Destination data TSAP selector */
-	__u8 dtsap_sel_ctrl;       /* Destination ctrl TSAP selector */
-
-	struct irlan_client_cb   client;   /* Client specific fields */
-	struct irlan_provider_cb provider; /* Provider specific fields */
-
-	__u32 max_sdu_size;
-	__u8  max_header_size;
-	
-	wait_queue_head_t open_wait;
-	struct timer_list watchdog_timer;
-};
-
-void irlan_close(struct irlan_cb *self);
-void irlan_close_tsaps(struct irlan_cb *self);
-
-int  irlan_register_netdev(struct irlan_cb *self);
-void irlan_ias_register(struct irlan_cb *self, __u8 tsap_sel);
-void irlan_start_watchdog_timer(struct irlan_cb *self, int timeout);
-
-void irlan_open_data_tsap(struct irlan_cb *self);
-
-int irlan_run_ctrl_tx_queue(struct irlan_cb *self);
-
-struct irlan_cb *irlan_get_any(void);
-void irlan_get_provider_info(struct irlan_cb *self);
-void irlan_get_media_char(struct irlan_cb *self);
-void irlan_open_data_channel(struct irlan_cb *self);
-void irlan_close_data_channel(struct irlan_cb *self);
-void irlan_set_multicast_filter(struct irlan_cb *self, int status);
-void irlan_set_broadcast_filter(struct irlan_cb *self, int status);
-
-int irlan_insert_byte_param(struct sk_buff *skb, char *param, __u8 value);
-int irlan_insert_short_param(struct sk_buff *skb, char *param, __u16 value);
-int irlan_insert_string_param(struct sk_buff *skb, char *param, char *value);
-int irlan_insert_array_param(struct sk_buff *skb, char *name, __u8 *value, 
-			     __u16 value_len);
-
-int irlan_extract_param(__u8 *buf, char *name, char *value, __u16 *len);
-
-#endif
-
-
diff --git a/include/net/irda/irlan_eth.h b/include/net/irda/irlan_eth.h
deleted file mode 100644
index de5c81691f33..000000000000
--- a/include/net/irda/irlan_eth.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*********************************************************************
- *                
- * Filename:      irlan_eth.h
- * Version:       
- * Description:   
- * Status:        Experimental.
- * Author:        Dag Brattli <dagb@cs.uit.no>
- * Created at:    Thu Oct 15 08:36:58 1998
- * Modified at:   Fri May 14 23:29:00 1999
- * Modified by:   Dag Brattli <dagb@cs.uit.no>
- * 
- *     Copyright (c) 1998-1999 Dag Brattli, All Rights Reserved.
- *      
- *     This program is free software; you can redistribute it and/or 
- *     modify it under the terms of the GNU General Public License as 
- *     published by the Free Software Foundation; either version 2 of 
- *     the License, or (at your option) any later version.
- *  
- *     Neither Dag Brattli nor University of Tromsø admit liability nor
- *     provide warranty for any of this software. This material is 
- *     provided "AS-IS" and at no charge.
- *     
- ********************************************************************/
-
-#ifndef IRLAN_ETH_H
-#define IRLAN_ETH_H
-
-struct net_device *alloc_irlandev(const char *name);
-int  irlan_eth_receive(void *instance, void *sap, struct sk_buff *skb);
-
-void irlan_eth_flow_indication( void *instance, void *sap, LOCAL_FLOW flow);
-#endif
diff --git a/include/net/irda/irlan_event.h b/include/net/irda/irlan_event.h
deleted file mode 100644
index 018b5a77e610..000000000000
--- a/include/net/irda/irlan_event.h
+++ /dev/null
@@ -1,81 +0,0 @@
-/*********************************************************************
- *                
- * Filename:      irlan_event.h
- * Version:       
- * Description:   LAN access
- * Status:        Experimental.
- * Author:        Dag Brattli <dagb@cs.uit.no>
- * Created at:    Sun Aug 31 20:14:37 1997
- * Modified at:   Tue Feb  2 09:45:17 1999
- * Modified by:   Dag Brattli <dagb@cs.uit.no>
- * 
- *     Copyright (c) 1997 Dag Brattli <dagb@cs.uit.no>, All Rights Reserved.
- *     
- *     This program is free software; you can redistribute it and/or 
- *     modify it under the terms of the GNU General Public License as 
- *     published by the Free Software Foundation; either version 2 of 
- *     the License, or (at your option) any later version.
- *
- *     Neither Dag Brattli nor University of Tromsø admit liability nor
- *     provide warranty for any of this software. This material is 
- *     provided "AS-IS" and at no charge.
- *
- ********************************************************************/
-
-#ifndef IRLAN_EVENT_H
-#define IRLAN_EVENT_H
-
-#include <linux/kernel.h>
-#include <linux/skbuff.h>
-
-#include <net/irda/irlan_common.h>
-
-typedef enum {
-	IRLAN_IDLE,
-	IRLAN_QUERY,
-	IRLAN_CONN, 
-	IRLAN_INFO,
-	IRLAN_MEDIA,
-	IRLAN_OPEN,
-	IRLAN_WAIT,
-	IRLAN_ARB, 
-	IRLAN_DATA,
-	IRLAN_CLOSE,
-	IRLAN_SYNC
-} IRLAN_STATE;
-
-typedef enum {
-	IRLAN_DISCOVERY_INDICATION,
-	IRLAN_IAS_PROVIDER_AVAIL,
-	IRLAN_IAS_PROVIDER_NOT_AVAIL,
-	IRLAN_LAP_DISCONNECT,
-	IRLAN_LMP_DISCONNECT,
-	IRLAN_CONNECT_COMPLETE,
-	IRLAN_DATA_INDICATION,
-	IRLAN_DATA_CONNECT_INDICATION,
-	IRLAN_RETRY_CONNECT,
-
-	IRLAN_CONNECT_INDICATION,
-	IRLAN_GET_INFO_CMD,
-	IRLAN_GET_MEDIA_CMD,
-	IRLAN_OPEN_DATA_CMD,
-	IRLAN_FILTER_CONFIG_CMD,
-
-	IRLAN_CHECK_CON_ARB,
-	IRLAN_PROVIDER_SIGNAL,
-
-	IRLAN_WATCHDOG_TIMEOUT,
-} IRLAN_EVENT;
-
-extern const char * const irlan_state[];
-
-void irlan_do_client_event(struct irlan_cb *self, IRLAN_EVENT event, 
-			   struct sk_buff *skb);
-
-void irlan_do_provider_event(struct irlan_cb *self, IRLAN_EVENT event, 
-			     struct sk_buff *skb);
-
-void irlan_next_client_state(struct irlan_cb *self, IRLAN_STATE state);
-void irlan_next_provider_state(struct irlan_cb *self, IRLAN_STATE state);
-
-#endif
diff --git a/include/net/irda/irlan_filter.h b/include/net/irda/irlan_filter.h
deleted file mode 100644
index a5a2539485bd..000000000000
--- a/include/net/irda/irlan_filter.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/*********************************************************************
- *                
- * Filename:      irlan_filter.h
- * Version:       
- * Description:   
- * Status:        Experimental.
- * Author:        Dag Brattli <dagb@cs.uit.no>
- * Created at:    Fri Jan 29 15:24:08 1999
- * Modified at:   Sun Feb  7 23:35:31 1999
- * Modified by:   Dag Brattli <dagb@cs.uit.no>
- * 
- *     Copyright (c) 1998 Dag Brattli, All Rights Reserved.
- *      
- *     This program is free software; you can redistribute it and/or 
- *     modify it under the terms of the GNU General Public License as 
- *     published by the Free Software Foundation; either version 2 of 
- *     the License, or (at your option) any later version.
- *  
- *     Neither Dag Brattli nor University of Tromsø admit liability nor
- *     provide warranty for any of this software. This material is 
- *     provided "AS-IS" and at no charge.
- *     
- ********************************************************************/
-
-#ifndef IRLAN_FILTER_H
-#define IRLAN_FILTER_H
-
-void irlan_check_command_param(struct irlan_cb *self, char *param, 
-			       char *value);
-void irlan_filter_request(struct irlan_cb *self, struct sk_buff *skb);
-#ifdef CONFIG_PROC_FS
-void irlan_print_filter(struct seq_file *seq, int filter_type);
-#endif
-
-#endif /* IRLAN_FILTER_H */
diff --git a/include/net/irda/irlan_provider.h b/include/net/irda/irlan_provider.h
deleted file mode 100644
index 92f3b0e1029b..000000000000
--- a/include/net/irda/irlan_provider.h
+++ /dev/null
@@ -1,52 +0,0 @@
-/*********************************************************************
- *                
- * Filename:      irlan_provider.h
- * Version:       0.1
- * Description:   IrDA LAN access layer
- * Status:        Experimental.
- * Author:        Dag Brattli <dagb@cs.uit.no>
- * Created at:    Sun Aug 31 20:14:37 1997
- * Modified at:   Sun May  9 12:26:11 1999
- * Modified by:   Dag Brattli <dagb@cs.uit.no>
- * 
- *     Copyright (c) 1998-1999 Dag Brattli <dagb@cs.uit.no>, All Rights Reserved.
- *     
- *     This program is free software; you can redistribute it and/or 
- *     modify it under the terms of the GNU General Public License as 
- *     published by the Free Software Foundation; either version 2 of 
- *     the License, or (at your option) any later version.
- *
- *     Neither Dag Brattli nor University of Tromsø admit liability nor
- *     provide warranty for any of this software. This material is 
- *     provided "AS-IS" and at no charge.
- *
- ********************************************************************/
-
-#ifndef IRLAN_SERVER_H
-#define IRLAN_SERVER_H
-
-#include <linux/kernel.h>
-#include <linux/types.h>
-#include <linux/skbuff.h>
-#include <linux/netdevice.h>
-
-#include <net/irda/irlan_common.h>
-
-void irlan_provider_ctrl_disconnect_indication(void *instance, void *sap, 
-					       LM_REASON reason, 
-					       struct sk_buff *skb);
-
-
-void irlan_provider_connect_response(struct irlan_cb *, struct tsap_cb *);
-
-int irlan_parse_open_data_cmd(struct irlan_cb *self, struct sk_buff *skb);
-int irlan_provider_parse_command(struct irlan_cb *self, int cmd,
-				 struct sk_buff *skb);
-
-void irlan_provider_send_reply(struct irlan_cb *self, int command, 
-			       int ret_code);
-int irlan_provider_open_ctrl_tsap(struct irlan_cb *self);
-
-#endif
-
-
diff --git a/include/net/irda/irlap.h b/include/net/irda/irlap.h
deleted file mode 100644
index 6f23e820618c..000000000000
--- a/include/net/irda/irlap.h
+++ /dev/null
@@ -1,311 +0,0 @@
-/*********************************************************************
- *                
- * Filename:      irlap.h
- * Version:       0.8
- * Description:   An IrDA LAP driver for Linux
- * Status:        Experimental.
- * Author:        Dag Brattli <dagb@cs.uit.no>
- * Created at:    Mon Aug  4 20:40:53 1997
- * Modified at:   Fri Dec 10 13:21:17 1999
- * Modified by:   Dag Brattli <dagb@cs.uit.no>
- * 
- *     Copyright (c) 1998-1999 Dag Brattli <dagb@cs.uit.no>, 
- *     All Rights Reserved.
- *     Copyright (c) 2000-2002 Jean Tourrilhes <jt@hpl.hp.com>
- *     
- *     This program is free software; you can redistribute it and/or 
- *     modify it under the terms of the GNU General Public License as 
- *     published by the Free Software Foundation; either version 2 of 
- *     the License, or (at your option) any later version.
- *
- *     Neither Dag Brattli nor University of Tromsø admit liability nor
- *     provide warranty for any of this software. This material is 
- *     provided "AS-IS" and at no charge.
- *
- ********************************************************************/
-
-#ifndef IRLAP_H
-#define IRLAP_H
-
-#include <linux/types.h>
-#include <linux/skbuff.h>
-#include <linux/netdevice.h>
-#include <linux/timer.h>
-
-#include <net/irda/irqueue.h>		/* irda_queue_t */
-#include <net/irda/qos.h>		/* struct qos_info */
-#include <net/irda/discovery.h>		/* discovery_t */
-#include <net/irda/irlap_event.h>	/* IRLAP_STATE, ... */
-#include <net/irda/irmod.h>		/* struct notify_t */
-
-#define CONFIG_IRDA_DYNAMIC_WINDOW 1
-
-#define LAP_RELIABLE   1
-#define LAP_UNRELIABLE 0
-
-#define LAP_ADDR_HEADER 1  /* IrLAP Address Header */
-#define LAP_CTRL_HEADER 1  /* IrLAP Control Header */
-
-/* May be different when we get VFIR */
-#define LAP_MAX_HEADER (LAP_ADDR_HEADER + LAP_CTRL_HEADER)
-
-/* Each IrDA device gets a random 32 bits IRLAP device address */
-#define LAP_ALEN 4
-
-#define BROADCAST  0xffffffff /* Broadcast device address */
-#define CBROADCAST 0xfe       /* Connection broadcast address */
-#define XID_FORMAT 0x01       /* Discovery XID format */
-
-/* Nobody seems to use this constant. */
-#define LAP_WINDOW_SIZE 8
-/* We keep the LAP queue very small to minimise the amount of buffering.
- * this improve latency and reduce resource consumption.
- * This work only because we have synchronous refilling of IrLAP through
- * the flow control mechanism (via scheduler and IrTTP).
- * 2 buffers is the minimum we can work with, one that we send while polling
- * IrTTP, and another to know that we should not send the pf bit.
- * Jean II */
-#define LAP_HIGH_THRESHOLD     2
-/* Some rare non TTP clients don't implement flow control, and
- * so don't comply with the above limit (and neither with this one).
- * For IAP and management, it doesn't matter, because they never transmit much.
- *.For IrLPT, this should be fixed.
- * - Jean II */
-#define LAP_MAX_QUEUE 10
-/* Please note that all IrDA management frames (LMP/TTP conn req/disc and
- * IAS queries) fall in the second category and are sent to LAP even if TTP
- * is stopped. This means that those frames will wait only a maximum of
- * two (2) data frames before beeing sent on the "wire", which speed up
- * new socket setup when the link is saturated.
- * Same story for two sockets competing for the medium : if one saturates
- * the LAP, when the other want to transmit it only has to wait for
- * maximum three (3) packets (2 + one scheduling), which improve performance
- * of delay sensitive applications.
- * Jean II */
-
-#define NR_EXPECTED     1
-#define NR_UNEXPECTED   0
-#define NR_INVALID     -1
-
-#define NS_EXPECTED     1
-#define NS_UNEXPECTED   0
-#define NS_INVALID     -1
-
-/*
- *  Meta information passed within the IrLAP state machine
- */
-struct irlap_info {
-	__u8 caddr;   /* Connection address */
-	__u8 control; /* Frame type */
-        __u8 cmd;
-
-	__u32 saddr;
-	__u32 daddr;
-	
-	int pf;        /* Poll/final bit set */
-
-	__u8  nr;      /* Sequence number of next frame expected */
-	__u8  ns;      /* Sequence number of frame sent */
-
-	int  S;        /* Number of slots */
-	int  slot;     /* Random chosen slot */
-	int  s;        /* Current slot */
-
-	discovery_t *discovery; /* Discovery information */
-};
-
-/* Main structure of IrLAP */
-struct irlap_cb {
-	irda_queue_t q;     /* Must be first */
-	magic_t magic;
-
-	/* Device we are attached to */
-	struct net_device  *netdev;
-	char		hw_name[2*IFNAMSIZ + 1];
-
-	/* Connection state */
-	volatile IRLAP_STATE state;       /* Current state */
-
-	/* Timers used by IrLAP */
-	struct timer_list query_timer;
-	struct timer_list slot_timer;
-	struct timer_list discovery_timer;
-	struct timer_list final_timer;
-	struct timer_list poll_timer;
-	struct timer_list wd_timer;
-	struct timer_list backoff_timer;
-
-	/* Media busy stuff */
-	struct timer_list media_busy_timer;
-	int media_busy;
-
-	/* Timeouts which will be different with different turn time */
-	int slot_timeout;
-	int poll_timeout;
-	int final_timeout;
-	int wd_timeout;
-
-	struct sk_buff_head txq;  /* Frames to be transmitted */
-	struct sk_buff_head txq_ultra;
-
- 	__u8    caddr;        /* Connection address */
-	__u32   saddr;        /* Source device address */
-	__u32   daddr;        /* Destination device address */
-
-	int     retry_count;  /* Times tried to establish connection */
-	int     add_wait;     /* True if we are waiting for frame */
-
-	__u8    connect_pending;
-	__u8    disconnect_pending;
-
-	/*  To send a faster RR if tx queue empty */
-#ifdef CONFIG_IRDA_FAST_RR
-	int     fast_RR_timeout;
-	int     fast_RR;      
-#endif /* CONFIG_IRDA_FAST_RR */
-	
-	int N1; /* N1 * F-timer = Negitiated link disconnect warning threshold */
-	int N2; /* N2 * F-timer = Negitiated link disconnect time */
-	int N3; /* Connection retry count */
-
-	int     local_busy;
-	int     remote_busy;
-	int     xmitflag;
-
-	__u8    vs;            /* Next frame to be sent */
-	__u8    vr;            /* Next frame to be received */
-	__u8    va;            /* Last frame acked */
- 	int     window;        /* Nr of I-frames allowed to send */
-	int     window_size;   /* Current negotiated window size */
-
-#ifdef CONFIG_IRDA_DYNAMIC_WINDOW
-	__u32   line_capacity; /* Number of bytes allowed to send */
-	__u32   bytes_left;    /* Number of bytes still allowed to transmit */
-#endif /* CONFIG_IRDA_DYNAMIC_WINDOW */
-
-	struct sk_buff_head wx_list;
-
-	__u8    ack_required;
-	
-	/* XID parameters */
- 	__u8    S;           /* Number of slots */
-	__u8    slot;        /* Random chosen slot */
- 	__u8    s;           /* Current slot */
-	int     frame_sent;  /* Have we sent reply? */
-
-	hashbin_t   *discovery_log;
- 	discovery_t *discovery_cmd;
-
-	__u32 speed;		/* Link speed */
-
-	struct qos_info  qos_tx;   /* QoS requested by peer */
-	struct qos_info  qos_rx;   /* QoS requested by self */
-	struct qos_info *qos_dev;  /* QoS supported by device */
-
-	notify_t notify; /* Callbacks to IrLMP */
-
-	int    mtt_required;  /* Minimum turnaround time required */
-	int    xbofs_delay;   /* Nr of XBOF's used to MTT */
-	int    bofs_count;    /* Negotiated extra BOFs */
-	int    next_bofs;     /* Negotiated extra BOFs after next frame */
-
-	int    mode;     /* IrLAP mode (primary, secondary or monitor) */
-};
-
-/* 
- *  Function prototypes 
- */
-int irlap_init(void);
-void irlap_cleanup(void);
-
-struct irlap_cb *irlap_open(struct net_device *dev, struct qos_info *qos,
-			    const char *hw_name);
-void irlap_close(struct irlap_cb *self);
-
-void irlap_connect_request(struct irlap_cb *self, __u32 daddr, 
-			   struct qos_info *qos, int sniff);
-void irlap_connect_response(struct irlap_cb *self, struct sk_buff *skb);
-void irlap_connect_indication(struct irlap_cb *self, struct sk_buff *skb);
-void irlap_connect_confirm(struct irlap_cb *, struct sk_buff *skb);
-
-void irlap_data_indication(struct irlap_cb *, struct sk_buff *, int unreliable);
-void irlap_data_request(struct irlap_cb *, struct sk_buff *, int unreliable);
-
-#ifdef CONFIG_IRDA_ULTRA
-void irlap_unitdata_request(struct irlap_cb *, struct sk_buff *);
-void irlap_unitdata_indication(struct irlap_cb *, struct sk_buff *);
-#endif /* CONFIG_IRDA_ULTRA */
-
-void irlap_disconnect_request(struct irlap_cb *);
-void irlap_disconnect_indication(struct irlap_cb *, LAP_REASON reason);
-
-void irlap_status_indication(struct irlap_cb *, int quality_of_link);
-
-void irlap_test_request(__u8 *info, int len);
-
-void irlap_discovery_request(struct irlap_cb *, discovery_t *discovery);
-void irlap_discovery_confirm(struct irlap_cb *, hashbin_t *discovery_log);
-void irlap_discovery_indication(struct irlap_cb *, discovery_t *discovery);
-
-void irlap_reset_indication(struct irlap_cb *self);
-void irlap_reset_confirm(void);
-
-void irlap_update_nr_received(struct irlap_cb *, int nr);
-int irlap_validate_nr_received(struct irlap_cb *, int nr);
-int irlap_validate_ns_received(struct irlap_cb *, int ns);
-
-int  irlap_generate_rand_time_slot(int S, int s);
-void irlap_initiate_connection_state(struct irlap_cb *);
-void irlap_flush_all_queues(struct irlap_cb *);
-void irlap_wait_min_turn_around(struct irlap_cb *, struct qos_info *);
-
-void irlap_apply_default_connection_parameters(struct irlap_cb *self);
-void irlap_apply_connection_parameters(struct irlap_cb *self, int now);
-
-#define IRLAP_GET_HEADER_SIZE(self) (LAP_MAX_HEADER)
-#define IRLAP_GET_TX_QUEUE_LEN(self) skb_queue_len(&self->txq)
-
-/* Return TRUE if the node is in primary mode (i.e. master)
- * - Jean II */
-static inline int irlap_is_primary(struct irlap_cb *self)
-{
-	int ret;
-	switch(self->state) {
-	case LAP_XMIT_P:
-	case LAP_NRM_P:
-		ret = 1;
-		break;
-	case LAP_XMIT_S:
-	case LAP_NRM_S:
-		ret = 0;
-		break;
-	default:
-		ret = -1;
-	}
-	return ret;
-}
-
-/* Clear a pending IrLAP disconnect. - Jean II */
-static inline void irlap_clear_disconnect(struct irlap_cb *self)
-{
-	self->disconnect_pending = FALSE;
-}
-
-/*
- * Function irlap_next_state (self, state)
- *
- *    Switches state and provides debug information
- *
- */
-static inline void irlap_next_state(struct irlap_cb *self, IRLAP_STATE state)
-{
-	/*
-	if (!self || self->magic != LAP_MAGIC)
-		return;
-
-		pr_debug("next LAP state = %s\n", irlap_state[state]);
-	*/
-	self->state = state;
-}
-
-#endif
diff --git a/include/net/irda/irlap_event.h b/include/net/irda/irlap_event.h
deleted file mode 100644
index e4325fee1267..000000000000
--- a/include/net/irda/irlap_event.h
+++ /dev/null
@@ -1,129 +0,0 @@
-/*********************************************************************
- *                
- *                
- * Filename:      irlap_event.h
- * Version:       0.1
- * Description:   
- * Status:        Experimental.
- * Author:        Dag Brattli <dagb@cs.uit.no>
- * Created at:    Sat Aug 16 00:59:29 1997
- * Modified at:   Tue Dec 21 11:20:30 1999
- * Modified by:   Dag Brattli <dagb@cs.uit.no>
- * 
- *     Copyright (c) 1998-1999 Dag Brattli <dagb@cs.uit.no>, 
- *     All Rights Reserved.
- *     Copyright (c) 2000-2002 Jean Tourrilhes <jt@hpl.hp.com>
- *     
- *     This program is free software; you can redistribute it and/or 
- *     modify it under the terms of the GNU General Public License as 
- *     published by the Free Software Foundation; either version 2 of 
- *     the License, or (at your option) any later version.
- * 
- *     This program is distributed in the hope that it will be useful,
- *     but WITHOUT ANY WARRANTY; without even the implied warranty of
- *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- *     GNU General Public License for more details.
- * 
- *     You should have received a copy of the GNU General Public License 
- *     along with this program; if not, see <http://www.gnu.org/licenses/>.
- *     
- ********************************************************************/
-
-#ifndef IRLAP_EVENT_H
-#define IRLAP_EVENT_H
-
-#include <net/irda/irda.h>
-
-/* A few forward declarations (to make compiler happy) */
-struct irlap_cb;
-struct irlap_info;
-
-/* IrLAP States */
-typedef enum {
-	LAP_NDM,         /* Normal disconnected mode */
-	LAP_QUERY,
-	LAP_REPLY,
-	LAP_CONN,        /* Connect indication */
-	LAP_SETUP,       /* Setting up connection */
-	LAP_OFFLINE,     /* A really boring state */
-	LAP_XMIT_P,
-	LAP_PCLOSE,
-	LAP_NRM_P,       /* Normal response mode as primary */
-	LAP_RESET_WAIT,
-	LAP_RESET,
-	LAP_NRM_S,       /* Normal response mode as secondary */
-	LAP_XMIT_S,
-	LAP_SCLOSE,
-	LAP_RESET_CHECK,
-} IRLAP_STATE;
-
-/* IrLAP Events */
-typedef enum {
-	/* Services events */
-	DISCOVERY_REQUEST,
-	CONNECT_REQUEST,
-	CONNECT_RESPONSE,
-	DISCONNECT_REQUEST,
-	DATA_REQUEST,
-	RESET_REQUEST,
-	RESET_RESPONSE,
-
-	/* Send events */
-	SEND_I_CMD,
-	SEND_UI_FRAME,
-
-	/* Receive events */
-	RECV_DISCOVERY_XID_CMD,
-	RECV_DISCOVERY_XID_RSP,
-	RECV_SNRM_CMD,
-	RECV_TEST_CMD,
-	RECV_TEST_RSP,
-	RECV_UA_RSP,
-	RECV_DM_RSP,
-	RECV_RD_RSP,
-	RECV_I_CMD,
-	RECV_I_RSP,
-	RECV_UI_FRAME,
-	RECV_FRMR_RSP,
-	RECV_RR_CMD,
-	RECV_RR_RSP,
-	RECV_RNR_CMD,
-	RECV_RNR_RSP,
-	RECV_REJ_CMD,
-	RECV_REJ_RSP,
-	RECV_SREJ_CMD,
-	RECV_SREJ_RSP,
-	RECV_DISC_CMD,
-
-	/* Timer events */
-	SLOT_TIMER_EXPIRED,
-	QUERY_TIMER_EXPIRED,
-	FINAL_TIMER_EXPIRED,
-	POLL_TIMER_EXPIRED,
-	DISCOVERY_TIMER_EXPIRED,
-	WD_TIMER_EXPIRED,
-	BACKOFF_TIMER_EXPIRED,
-	MEDIA_BUSY_TIMER_EXPIRED,
-} IRLAP_EVENT;
-
-/*
- * Disconnect reason code
- */
-typedef enum { /* FIXME check the two first reason codes */
-	LAP_DISC_INDICATION=1, /* Received a disconnect request from peer */
-	LAP_NO_RESPONSE,       /* To many retransmits without response */
-	LAP_RESET_INDICATION,  /* To many retransmits, or invalid nr/ns */
-	LAP_FOUND_NONE,        /* No devices were discovered */
-	LAP_MEDIA_BUSY,
-	LAP_PRIMARY_CONFLICT,
-} LAP_REASON;
-
-extern const char *const irlap_state[];
-
-void irlap_do_event(struct irlap_cb *self, IRLAP_EVENT event, 
-		    struct sk_buff *skb, struct irlap_info *info);
-void irlap_print_event(IRLAP_EVENT event);
-
-int irlap_qos_negotiate(struct irlap_cb *self, struct sk_buff *skb);
-
-#endif
diff --git a/include/net/irda/irlap_frame.h b/include/net/irda/irlap_frame.h
deleted file mode 100644
index cbc12a926e5f..000000000000
--- a/include/net/irda/irlap_frame.h
+++ /dev/null
@@ -1,167 +0,0 @@
-/*********************************************************************
- *                
- * Filename:      irlap_frame.h
- * Version:       0.9
- * Description:   IrLAP frame declarations
- * Status:        Experimental.
- * Author:        Dag Brattli <dagb@cs.uit.no>
- * Created at:    Tue Aug 19 10:27:26 1997
- * Modified at:   Sat Dec 25 21:07:26 1999
- * Modified by:   Dag Brattli <dagb@cs.uit.no>
- * 
- *     Copyright (c) 1997-1999 Dag Brattli <dagb@cs.uit.no>,
- *     All Rights Reserved.
- *     Copyright (c) 2000-2002 Jean Tourrilhes <jt@hpl.hp.com>
- *     
- *     This program is free software; you can redistribute it and/or 
- *     modify it under the terms of the GNU General Public License as 
- *     published by the Free Software Foundation; either version 2 of 
- *     the License, or (at your option) any later version.
- * 
- *     This program is distributed in the hope that it will be useful,
- *     but WITHOUT ANY WARRANTY; without even the implied warranty of
- *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- *     GNU General Public License for more details.
- * 
- *     You should have received a copy of the GNU General Public License 
- *     along with this program; if not, see <http://www.gnu.org/licenses/>.
- *     
- ********************************************************************/
-
-#ifndef IRLAP_FRAME_H
-#define IRLAP_FRAME_H
-
-#include <linux/skbuff.h>
-
-#include <net/irda/irda.h>
-
-/* A few forward declarations (to make compiler happy) */
-struct irlap_cb;
-struct discovery_t;
-
-/* Frame types and templates */
-#define INVALID   0xff
-
-/* Unnumbered (U) commands */
-#define SNRM_CMD  0x83 /* Set Normal Response Mode */
-#define DISC_CMD  0x43 /* Disconnect */
-#define XID_CMD   0x2f /* Exchange Station Identification */
-#define TEST_CMD  0xe3 /* Test */
-
-/* Unnumbered responses */
-#define RNRM_RSP  0x83 /* Request Normal Response Mode */
-#define UA_RSP    0x63 /* Unnumbered Acknowledgement */
-#define FRMR_RSP  0x87 /* Frame Reject */
-#define DM_RSP    0x0f /* Disconnect Mode */
-#define RD_RSP    0x43 /* Request Disconnection */
-#define XID_RSP   0xaf /* Exchange Station Identification */
-#define TEST_RSP  0xe3 /* Test frame */
-
-/* Supervisory (S) */
-#define RR        0x01 /* Receive Ready */
-#define REJ       0x09 /* Reject */
-#define RNR       0x05 /* Receive Not Ready */
-#define SREJ      0x0d /* Selective Reject */
-
-/* Information (I) */
-#define I_FRAME   0x00 /* Information Format */
-#define UI_FRAME  0x03 /* Unnumbered Information */
-
-#define CMD_FRAME 0x01
-#define RSP_FRAME 0x00
-
-#define PF_BIT    0x10 /* Poll/final bit */
-
-/* Some IrLAP field lengths */
-/*
- * Only baud rate triplet is 4 bytes (PV can be 2 bytes).
- * All others params (7) are 3 bytes, so that's 7*3 + 1*4 bytes.
- */
-#define IRLAP_NEGOCIATION_PARAMS_LEN 25
-#define IRLAP_DISCOVERY_INFO_LEN     32
-
-struct disc_frame {
-	__u8 caddr;          /* Connection address */
-	__u8 control;
-} __packed;
-
-struct xid_frame {
-	__u8  caddr; /* Connection address */
-	__u8  control;
-	__u8  ident; /* Should always be XID_FORMAT */ 
-	__le32 saddr; /* Source device address */
-	__le32 daddr; /* Destination device address */
-	__u8  flags; /* Discovery flags */
-	__u8  slotnr;
-	__u8  version;
-} __packed;
-
-struct test_frame {
-	__u8 caddr;          /* Connection address */
-	__u8 control;
-	__le32 saddr;         /* Source device address */
-	__le32 daddr;         /* Destination device address */
-} __packed;
-
-struct ua_frame {
-	__u8 caddr;
-	__u8 control;
-	__le32 saddr; /* Source device address */
-	__le32 daddr; /* Dest device address */
-} __packed;
-
-struct dm_frame {
-	__u8 caddr;          /* Connection address */
-	__u8 control;
-} __packed;
-
-struct rd_frame {
-	__u8 caddr;          /* Connection address */
-	__u8 control;
-} __packed;
-
-struct rr_frame {
-	__u8 caddr;          /* Connection address */
-	__u8 control;
-} __packed;
-
-struct i_frame {
-	__u8 caddr;
-	__u8 control;
-} __packed;
-
-struct snrm_frame {
-	__u8  caddr;
-	__u8  control;
-	__le32 saddr;
-	__le32 daddr;
-	__u8  ncaddr;
-} __packed;
-
-void irlap_queue_xmit(struct irlap_cb *self, struct sk_buff *skb);
-void irlap_send_discovery_xid_frame(struct irlap_cb *, int S, __u8 s, 
-				    __u8 command,
-				    struct discovery_t *discovery);
-void irlap_send_snrm_frame(struct irlap_cb *, struct qos_info *);
-void irlap_send_test_frame(struct irlap_cb *self, __u8 caddr, __u32 daddr, 
-			   struct sk_buff *cmd);
-void irlap_send_ua_response_frame(struct irlap_cb *, struct qos_info *);
-void irlap_send_dm_frame(struct irlap_cb *self);
-void irlap_send_rd_frame(struct irlap_cb *self);
-void irlap_send_disc_frame(struct irlap_cb *self);
-void irlap_send_rr_frame(struct irlap_cb *self, int command);
-
-void irlap_send_data_primary(struct irlap_cb *, struct sk_buff *);
-void irlap_send_data_primary_poll(struct irlap_cb *, struct sk_buff *);
-void irlap_send_data_secondary(struct irlap_cb *, struct sk_buff *);
-void irlap_send_data_secondary_final(struct irlap_cb *, struct sk_buff *);
-void irlap_resend_rejected_frames(struct irlap_cb *, int command);
-void irlap_resend_rejected_frame(struct irlap_cb *self, int command);
-
-void irlap_send_ui_frame(struct irlap_cb *self, struct sk_buff *skb,
-			 __u8 caddr, int command);
-
-int irlap_insert_qos_negotiation_params(struct irlap_cb *self,
-					struct sk_buff *skb);
-
-#endif
diff --git a/include/net/irda/irlmp.h b/include/net/irda/irlmp.h
deleted file mode 100644
index f132924cc9da..000000000000
--- a/include/net/irda/irlmp.h
+++ /dev/null
@@ -1,295 +0,0 @@
-/*********************************************************************
- *                
- * Filename:      irlmp.h
- * Version:       0.9
- * Description:   IrDA Link Management Protocol (LMP) layer
- * Status:        Experimental.
- * Author:        Dag Brattli <dagb@cs.uit.no>
- * Created at:    Sun Aug 17 20:54:32 1997
- * Modified at:   Fri Dec 10 13:23:01 1999
- * Modified by:   Dag Brattli <dagb@cs.uit.no>
- * 
- *     Copyright (c) 1998-1999 Dag Brattli <dagb@cs.uit.no>, 
- *     All Rights Reserved.
- *     Copyright (c) 2000-2002 Jean Tourrilhes <jt@hpl.hp.com>
- *     
- *     This program is free software; you can redistribute it and/or 
- *     modify it under the terms of the GNU General Public License as 
- *     published by the Free Software Foundation; either version 2 of 
- *     the License, or (at your option) any later version.
- *
- *     Neither Dag Brattli nor University of Tromsø admit liability nor
- *     provide warranty for any of this software. This material is 
- *     provided "AS-IS" and at no charge.
- *
- ********************************************************************/
-
-#ifndef IRLMP_H
-#define IRLMP_H
-
-#include <asm/param.h>  /* for HZ */
-
-#include <linux/types.h>
-
-#include <net/irda/irda.h>
-#include <net/irda/qos.h>
-#include <net/irda/irlap.h>		/* LAP_MAX_HEADER, ... */
-#include <net/irda/irlmp_event.h>
-#include <net/irda/irqueue.h>
-#include <net/irda/discovery.h>
-
-/* LSAP-SEL's */
-#define LSAP_MASK     0x7f
-#define LSAP_IAS      0x00
-#define LSAP_ANY      0xff
-#define LSAP_MAX      0x6f /* 0x70-0x7f are reserved */
-#define LSAP_CONNLESS 0x70 /* Connectionless LSAP, mostly used for Ultra */
-
-#define DEV_ADDR_ANY  0xffffffff
-
-#define LMP_HEADER          2    /* Dest LSAP + Source LSAP */
-#define LMP_CONTROL_HEADER  4    /* LMP_HEADER + opcode + parameter */
-#define LMP_PID_HEADER      1    /* Used by Ultra */
-#define LMP_MAX_HEADER      (LMP_CONTROL_HEADER+LAP_MAX_HEADER)
-
-#define LM_MAX_CONNECTIONS  10
-
-#define LM_IDLE_TIMEOUT     2*HZ /* 2 seconds for now */
-
-typedef enum {
-	S_PNP = 0,
-	S_PDA,
-	S_COMPUTER,
-	S_PRINTER,
-	S_MODEM,
-	S_FAX,
-	S_LAN,
-	S_TELEPHONY,
-	S_COMM,
-	S_OBEX,
-	S_ANY,
-	S_END,
-} SERVICE;
-
-/* For selective discovery */
-typedef void (*DISCOVERY_CALLBACK1) (discinfo_t *, DISCOVERY_MODE, void *);
-/* For expiry (the same) */
-typedef void (*DISCOVERY_CALLBACK2) (discinfo_t *, DISCOVERY_MODE, void *);
-
-typedef struct {
-	irda_queue_t queue; /* Must be first */
-
-	__u16_host_order hints; /* Hint bits */
-} irlmp_service_t;
-
-typedef struct {
-	irda_queue_t queue; /* Must be first */
-
-	__u16_host_order hint_mask;
-
-	DISCOVERY_CALLBACK1 disco_callback;	/* Selective discovery */
-	DISCOVERY_CALLBACK2 expir_callback;	/* Selective expiration */
-	void *priv;                /* Used to identify client */
-} irlmp_client_t;
-
-/*
- *  Information about each logical LSAP connection
- */
-struct lsap_cb {
-	irda_queue_t queue;      /* Must be first */
-	magic_t magic;
-
-	unsigned long connected;	/* set_bit used on this */
-	int  persistent;
-
-	__u8 slsap_sel;   /* Source (this) LSAP address */
-	__u8 dlsap_sel;   /* Destination LSAP address (if connected) */
-#ifdef CONFIG_IRDA_ULTRA
-	__u8 pid;         /* Used by connectionless LSAP */
-#endif /* CONFIG_IRDA_ULTRA */
-	struct sk_buff *conn_skb; /* Store skb here while connecting */
-
-	struct timer_list watchdog_timer;
-
-	LSAP_STATE      lsap_state;  /* Connection state */
-	notify_t        notify;      /* Indication/Confirm entry points */
-	struct qos_info qos;         /* QoS for this connection */
-
-	struct lap_cb *lap; /* Pointer to LAP connection structure */
-};
-
-/*
- *  Used for caching the last slsap->dlsap->handle mapping
- *
- * We don't need to keep/match the remote address in the cache because
- * we are associated with a specific LAP (which implies it).
- * Jean II
- */
-typedef struct {
-	int valid;
-
-	__u8 slsap_sel;
-	__u8 dlsap_sel;
-	struct lsap_cb *lsap;
-} CACHE_ENTRY;
-
-/*
- *  Information about each registered IrLAP layer
- */
-struct lap_cb {
-	irda_queue_t queue; /* Must be first */
-	magic_t magic;
-
-	int reason;    /* LAP disconnect reason */
-
-	IRLMP_STATE lap_state;
-
-	struct irlap_cb *irlap;   /* Instance of IrLAP layer */
-	hashbin_t *lsaps;         /* LSAP associated with this link */
-	struct lsap_cb *flow_next;	/* Next lsap to be polled for Tx */
-
-	__u8  caddr;  /* Connection address */
- 	__u32 saddr;  /* Source device address */
- 	__u32 daddr;  /* Destination device address */
-	
-	struct qos_info *qos;  /* LAP QoS for this session */
-	struct timer_list idle_timer;
-	
-#ifdef CONFIG_IRDA_CACHE_LAST_LSAP
-	/* The lsap cache was moved from struct irlmp_cb to here because
-	 * it must be associated with the specific LAP. Also, this
-	 * improves performance. - Jean II */
-	CACHE_ENTRY cache;  /* Caching last slsap->dlsap->handle mapping */
-#endif
-};
-
-/*
- *  Main structure for IrLMP
- */
-struct irlmp_cb {
-	magic_t magic;
-
-	__u8 conflict_flag;
-	
-	discovery_t discovery_cmd; /* Discovery command to use by IrLAP */
-	discovery_t discovery_rsp; /* Discovery response to use by IrLAP */
-
-	/* Last lsap picked automatically by irlmp_find_free_slsap() */
-	int	last_lsap_sel;
-
-	struct timer_list discovery_timer;
-
- 	hashbin_t *links;         /* IrLAP connection table */
-	hashbin_t *unconnected_lsaps;
- 	hashbin_t *clients;
-	hashbin_t *services;
-
-	hashbin_t *cachelog;	/* Current discovery log */
-
-	int running;
-
-	__u16_host_order hints; /* Hint bits */
-};
-
-/* Prototype declarations */
-int  irlmp_init(void);
-void irlmp_cleanup(void);
-struct lsap_cb *irlmp_open_lsap(__u8 slsap, notify_t *notify, __u8 pid);
-void irlmp_close_lsap( struct lsap_cb *self);
-
-__u16 irlmp_service_to_hint(int service);
-void *irlmp_register_service(__u16 hints);
-int irlmp_unregister_service(void *handle);
-void *irlmp_register_client(__u16 hint_mask, DISCOVERY_CALLBACK1 disco_clb,
-			    DISCOVERY_CALLBACK2 expir_clb, void *priv);
-int irlmp_unregister_client(void *handle);
-int irlmp_update_client(void *handle, __u16 hint_mask, 
-			DISCOVERY_CALLBACK1 disco_clb,
-			DISCOVERY_CALLBACK2 expir_clb, void *priv);
-
-void irlmp_register_link(struct irlap_cb *, __u32 saddr, notify_t *);
-void irlmp_unregister_link(__u32 saddr);
-
-int  irlmp_connect_request(struct lsap_cb *, __u8 dlsap_sel, 
-			   __u32 saddr, __u32 daddr,
-			   struct qos_info *, struct sk_buff *);
-void irlmp_connect_indication(struct lsap_cb *self, struct sk_buff *skb);
-int  irlmp_connect_response(struct lsap_cb *, struct sk_buff *);
-void irlmp_connect_confirm(struct lsap_cb *, struct sk_buff *);
-struct lsap_cb *irlmp_dup(struct lsap_cb *self, void *instance);
-
-void irlmp_disconnect_indication(struct lsap_cb *self, LM_REASON reason, 
-				 struct sk_buff *userdata);
-int  irlmp_disconnect_request(struct lsap_cb *, struct sk_buff *userdata);
-
-void irlmp_discovery_confirm(hashbin_t *discovery_log, DISCOVERY_MODE mode);
-void irlmp_discovery_request(int nslots);
-discinfo_t *irlmp_get_discoveries(int *pn, __u16 mask, int nslots);
-void irlmp_do_expiry(void);
-void irlmp_do_discovery(int nslots);
-discovery_t *irlmp_get_discovery_response(void);
-void irlmp_discovery_expiry(discinfo_t *expiry, int number);
-
-int  irlmp_data_request(struct lsap_cb *, struct sk_buff *);
-void irlmp_data_indication(struct lsap_cb *, struct sk_buff *);
-
-int  irlmp_udata_request(struct lsap_cb *, struct sk_buff *);
-void irlmp_udata_indication(struct lsap_cb *, struct sk_buff *);
-
-#ifdef CONFIG_IRDA_ULTRA
-int  irlmp_connless_data_request(struct lsap_cb *, struct sk_buff *, __u8);
-void irlmp_connless_data_indication(struct lsap_cb *, struct sk_buff *);
-#endif /* CONFIG_IRDA_ULTRA */
-
-void irlmp_status_indication(struct lap_cb *, LINK_STATUS link, LOCK_STATUS lock);
-void irlmp_flow_indication(struct lap_cb *self, LOCAL_FLOW flow);
-
-LM_REASON irlmp_convert_lap_reason(LAP_REASON);
-
-static inline __u32 irlmp_get_saddr(const struct lsap_cb *self)
-{
-	return (self && self->lap) ? self->lap->saddr : 0;
-}
-
-static inline __u32 irlmp_get_daddr(const struct lsap_cb *self)
-{
-	return (self && self->lap) ? self->lap->daddr : 0;
-}
-
-const char *irlmp_reason_str(LM_REASON reason);
-
-extern int sysctl_discovery_timeout;
-extern int sysctl_discovery_slots;
-extern int sysctl_discovery;
-extern int sysctl_lap_keepalive_time;	/* in ms, default is LM_IDLE_TIMEOUT */
-extern struct irlmp_cb *irlmp;
-
-/* Check if LAP queue is full.
- * Used by IrTTP for low control, see comments in irlap.h - Jean II */
-static inline int irlmp_lap_tx_queue_full(struct lsap_cb *self)
-{
-	if (self == NULL)
-		return 0;
-	if (self->lap == NULL)
-		return 0;
-	if (self->lap->irlap == NULL)
-		return 0;
-
-	return IRLAP_GET_TX_QUEUE_LEN(self->lap->irlap) >= LAP_HIGH_THRESHOLD;
-}
-
-/* After doing a irlmp_dup(), this get one of the two socket back into
- * a state where it's waiting incoming connections.
- * Note : this can be used *only* if the socket is not yet connected
- * (i.e. NO irlmp_connect_response() done on this socket).
- * - Jean II */
-static inline void irlmp_listen(struct lsap_cb *self)
-{
-	self->dlsap_sel = LSAP_ANY;
-	self->lap = NULL;
-	self->lsap_state = LSAP_DISCONNECTED;
-	/* Started when we received the LM_CONNECT_INDICATION */
-	del_timer(&self->watchdog_timer);
-}
-
-#endif
diff --git a/include/net/irda/irlmp_event.h b/include/net/irda/irlmp_event.h
deleted file mode 100644
index 9e4ec17a7449..000000000000
--- a/include/net/irda/irlmp_event.h
+++ /dev/null
@@ -1,98 +0,0 @@
-/*********************************************************************
- *                
- * Filename:      irlmp_event.h
- * Version:       0.1
- * Description:   IrDA-LMP event handling
- * Status:        Experimental.
- * Author:        Dag Brattli <dagb@cs.uit.no>
- * Created at:    Mon Aug  4 20:40:53 1997
- * Modified at:   Thu Jul  8 12:18:54 1999
- * Modified by:   Dag Brattli <dagb@cs.uit.no>
- * 
- *     Copyright (c) 1997, 1999 Dag Brattli <dagb@cs.uit.no>, 
- *     All Rights Reserved.
- *     Copyright (c) 2000-2002 Jean Tourrilhes <jt@hpl.hp.com>
- *     
- *     This program is free software; you can redistribute it and/or 
- *     modify it under the terms of the GNU General Public License as 
- *     published by the Free Software Foundation; either version 2 of 
- *     the License, or (at your option) any later version.
- *
- *     Neither Dag Brattli nor University of Tromsø admit liability nor
- *     provide warranty for any of this software. This material is 
- *     provided "AS-IS" and at no charge.
- *
- ********************************************************************/
-
-#ifndef IRLMP_EVENT_H
-#define IRLMP_EVENT_H
-
-/* A few forward declarations (to make compiler happy) */
-struct irlmp_cb;
-struct lsap_cb;
-struct lap_cb;
-struct discovery_t;
-
-/* LAP states */
-typedef enum {
-	/* IrLAP connection control states */
-	LAP_STANDBY,             /* No LAP connection */
-	LAP_U_CONNECT,           /* Starting LAP connection */
-	LAP_ACTIVE,              /* LAP connection is active */
-} IRLMP_STATE;
-
-/* LSAP connection control states */
-typedef enum {
-	LSAP_DISCONNECTED,        /* No LSAP connection */
-	LSAP_CONNECT,             /* Connect indication from peer */
-	LSAP_CONNECT_PEND,        /* Connect request from service user */
-	LSAP_DATA_TRANSFER_READY, /* LSAP connection established */          
-	LSAP_SETUP,               /* Trying to set up LSAP connection */
-	LSAP_SETUP_PEND,          /* Request to start LAP connection */
-} LSAP_STATE;
-
-typedef enum {
-	/* LSAP events */
- 	LM_CONNECT_REQUEST,
- 	LM_CONNECT_CONFIRM,
-	LM_CONNECT_RESPONSE,
- 	LM_CONNECT_INDICATION, 	
-	
-	LM_DISCONNECT_INDICATION,
-	LM_DISCONNECT_REQUEST,
-
- 	LM_DATA_REQUEST,
-	LM_UDATA_REQUEST,
- 	LM_DATA_INDICATION,
-	LM_UDATA_INDICATION,
-
-	LM_WATCHDOG_TIMEOUT,
-
-	/* IrLAP events */
-	LM_LAP_CONNECT_REQUEST,
- 	LM_LAP_CONNECT_INDICATION, 
- 	LM_LAP_CONNECT_CONFIRM,
- 	LM_LAP_DISCONNECT_INDICATION, 
-	LM_LAP_DISCONNECT_REQUEST,
-	LM_LAP_DISCOVERY_REQUEST,
- 	LM_LAP_DISCOVERY_CONFIRM,
-	LM_LAP_IDLE_TIMEOUT,
-} IRLMP_EVENT;
-
-extern const char *const irlmp_state[];
-extern const char *const irlsap_state[];
-
-void irlmp_watchdog_timer_expired(void *data);
-void irlmp_discovery_timer_expired(void *data);
-void irlmp_idle_timer_expired(void *data);
-
-void irlmp_do_lap_event(struct lap_cb *self, IRLMP_EVENT event, 
-			struct sk_buff *skb);
-int irlmp_do_lsap_event(struct lsap_cb *self, IRLMP_EVENT event, 
-			struct sk_buff *skb);
-
-#endif /* IRLMP_EVENT_H */
-
-
-
-
diff --git a/include/net/irda/irlmp_frame.h b/include/net/irda/irlmp_frame.h
deleted file mode 100644
index 1906eb71422e..000000000000
--- a/include/net/irda/irlmp_frame.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/*********************************************************************
- *                
- * Filename:      irlmp_frame.h
- * Version:       0.9
- * Description:   
- * Status:        Experimental.
- * Author:        Dag Brattli <dagb@cs.uit.no>
- * Created at:    Tue Aug 19 02:09:59 1997
- * Modified at:   Fri Dec 10 13:21:53 1999
- * Modified by:   Dag Brattli <dagb@cs.uit.no>
- * 
- *     Copyright (c) 1997, 1999 Dag Brattli <dagb@cs.uit.no>, 
- *     All Rights Reserved.
- *     
- *     This program is free software; you can redistribute it and/or 
- *     modify it under the terms of the GNU General Public License as 
- *     published by the Free Software Foundation; either version 2 of 
- *     the License, or (at your option) any later version.
- *
- *     Neither Dag Brattli nor University of Tromsø admit liability nor
- *     provide warranty for any of this software. This material is 
- *     provided "AS-IS" and at no charge.
- *
- ********************************************************************/
-
-#ifndef IRMLP_FRAME_H
-#define IRMLP_FRAME_H
-
-#include <linux/skbuff.h>
-
-#include <net/irda/discovery.h>
-
-/* IrLMP frame opcodes */
-#define CONNECT_CMD    0x01
-#define CONNECT_CNF    0x81
-#define DISCONNECT     0x02
-#define ACCESSMODE_CMD 0x03
-#define ACCESSMODE_CNF 0x83
-
-#define CONTROL_BIT    0x80
-
-void irlmp_send_data_pdu(struct lap_cb *self, __u8 dlsap, __u8 slsap,
-				int expedited, struct sk_buff *skb);
-void irlmp_send_lcf_pdu(struct lap_cb *self, __u8 dlsap, __u8 slsap, 
-			__u8 opcode, struct sk_buff *skb);
-void irlmp_link_data_indication(struct lap_cb *, struct sk_buff *, 
-				int unreliable);
-#ifdef CONFIG_IRDA_ULTRA
-void irlmp_link_unitdata_indication(struct lap_cb *, struct sk_buff *);
-#endif /* CONFIG_IRDA_ULTRA */
-
-void irlmp_link_connect_indication(struct lap_cb *, __u32 saddr, __u32 daddr,
-				   struct qos_info *qos, struct sk_buff *skb);
-void irlmp_link_connect_request(__u32 daddr);
-void irlmp_link_connect_confirm(struct lap_cb *self, struct qos_info *qos, 
-				struct sk_buff *skb);
-void irlmp_link_disconnect_indication(struct lap_cb *, struct irlap_cb *, 
-				      LAP_REASON reason, struct sk_buff *); 
-void irlmp_link_discovery_confirm(struct lap_cb *self, hashbin_t *log);
-void irlmp_link_discovery_indication(struct lap_cb *, discovery_t *discovery);
-
-#endif
diff --git a/include/net/irda/irmod.h b/include/net/irda/irmod.h
deleted file mode 100644
index 86f0dbb8ee5d..000000000000
--- a/include/net/irda/irmod.h
+++ /dev/null
@@ -1,109 +0,0 @@
-/*********************************************************************
- *                
- * Filename:      irmod.h
- * Version:       0.3
- * Description:   IrDA module and utilities functions
- * Status:        Experimental.
- * Author:        Dag Brattli <dagb@cs.uit.no>
- * Created at:    Mon Dec 15 13:58:52 1997
- * Modified at:   Fri Jan 28 13:15:24 2000
- * Modified by:   Dag Brattli <dagb@cs.uit.no>
- *
- *     Copyright (c) 1998-2000 Dag Brattli, All Rights Reserved.
- *     Copyright (c) 2000-2002 Jean Tourrilhes <jt@hpl.hp.com>
- *      
- *     This program is free software; you can redistribute it and/or 
- *     modify it under the terms of the GNU General Public License as 
- *     published by the Free Software Foundation; either version 2 of 
- *     the License, or (at your option) any later version.
- *  
- *     Neither Dag Brattli nor University of Tromsø admit liability nor
- *     provide warranty for any of this software. This material is 
- *     provided "AS-IS" and at no charg.
- *     
- ********************************************************************/
-
-#ifndef IRMOD_H
-#define IRMOD_H
-
-/* Misc status information */
-typedef enum {
-	STATUS_OK,
-	STATUS_ABORTED,
-	STATUS_NO_ACTIVITY,
-	STATUS_NOISY,
-	STATUS_REMOTE,
-} LINK_STATUS;
-
-typedef enum {
-	LOCK_NO_CHANGE,
-	LOCK_LOCKED,
-	LOCK_UNLOCKED,
-} LOCK_STATUS;
-
-typedef enum { FLOW_STOP, FLOW_START } LOCAL_FLOW;
-
-/*  
- *  IrLMP disconnect reasons. The order is very important, since they 
- *  correspond to disconnect reasons sent in IrLMP disconnect frames, so
- *  please do not touch :-)
- */
-typedef enum {
-	LM_USER_REQUEST = 1,  /* User request */
-	LM_LAP_DISCONNECT,    /* Unexpected IrLAP disconnect */
-	LM_CONNECT_FAILURE,   /* Failed to establish IrLAP connection */
-	LM_LAP_RESET,         /* IrLAP reset */
-	LM_INIT_DISCONNECT,   /* Link Management initiated disconnect */
-	LM_LSAP_NOTCONN,      /* Data delivered on unconnected LSAP */
-	LM_NON_RESP_CLIENT,   /* Non responsive LM-MUX client */
-	LM_NO_AVAIL_CLIENT,   /* No available LM-MUX client */
-	LM_CONN_HALF_OPEN,    /* Connection is half open */
-	LM_BAD_SOURCE_ADDR,   /* Illegal source address (i.e 0x00) */
-} LM_REASON;
-#define LM_UNKNOWN 0xff       /* Unspecified disconnect reason */
-
-/* A few forward declarations (to make compiler happy) */
-struct qos_info;		/* in <net/irda/qos.h> */
-
-/*
- *  Notify structure used between transport and link management layers
- */
-typedef struct {
-	int (*data_indication)(void *priv, void *sap, struct sk_buff *skb);
-	int (*udata_indication)(void *priv, void *sap, struct sk_buff *skb);
-	void (*connect_confirm)(void *instance, void *sap, 
-				struct qos_info *qos, __u32 max_sdu_size,
-				__u8 max_header_size, struct sk_buff *skb);
-	void (*connect_indication)(void *instance, void *sap, 
-				   struct qos_info *qos, __u32 max_sdu_size, 
-				   __u8 max_header_size, struct sk_buff *skb);
-	void (*disconnect_indication)(void *instance, void *sap, 
-				      LM_REASON reason, struct sk_buff *);
-	void (*flow_indication)(void *instance, void *sap, LOCAL_FLOW flow);
-	void (*status_indication)(void *instance,
-				  LINK_STATUS link, LOCK_STATUS lock);
-	void *instance; /* Layer instance pointer */
-	char name[16];  /* Name of layer */
-} notify_t;
-
-#define NOTIFY_MAX_NAME 16
-
-/* Zero the notify structure */
-void irda_notify_init(notify_t *notify);
-
-/* Locking wrapper - Note the inverted logic on irda_lock().
- * Those function basically return false if the lock is already in the
- * position you want to set it. - Jean II */
-#define irda_lock(lock)		(! test_and_set_bit(0, (void *) (lock)))
-#define irda_unlock(lock)	(test_and_clear_bit(0, (void *) (lock)))
-
-#endif /* IRMOD_H */
-
-
-
-
-
-
-
-
-
diff --git a/include/net/irda/irqueue.h b/include/net/irda/irqueue.h
deleted file mode 100644
index 37f512bd6733..000000000000
--- a/include/net/irda/irqueue.h
+++ /dev/null
@@ -1,96 +0,0 @@
-/*********************************************************************
- *                
- * Filename:      irqueue.h
- * Version:       0.3
- * Description:   General queue implementation
- * Status:        Experimental.
- * Author:        Dag Brattli <dagb@cs.uit.no>
- * Created at:    Tue Jun  9 13:26:50 1998
- * Modified at:   Thu Oct  7 13:25:16 1999
- * Modified by:   Dag Brattli <dagb@cs.uit.no>
- * 
- *     Copyright (C) 1998-1999, Aage Kvalnes <aage@cs.uit.no>
- *     Copyright (c) 1998, Dag Brattli
- *     All Rights Reserved.
- *      
- *     This code is taken from the Vortex Operating System written by Aage
- *     Kvalnes and has been ported to Linux and Linux/IR by Dag Brattli
- *
- *     This program is free software; you can redistribute it and/or 
- *     modify it under the terms of the GNU General Public License as 
- *     published by the Free Software Foundation; either version 2 of 
- *     the License, or (at your option) any later version.
- *  
- *     Neither Dag Brattli nor University of Tromsø admit liability nor
- *     provide warranty for any of this software. This material is 
- *     provided "AS-IS" and at no charge.
- *     
- ********************************************************************/
-
-#include <linux/types.h>
-#include <linux/spinlock.h>
-
-#ifndef IRDA_QUEUE_H
-#define IRDA_QUEUE_H
-
-#define NAME_SIZE      32
-
-/*
- * Hash types (some flags can be xored)
- * See comments in irqueue.c for which one to use...
- */
-#define HB_NOLOCK	0	/* No concurent access prevention */
-#define HB_LOCK		1	/* Prevent concurent write with global lock */
-
-/*
- * Hash defines
- */
-#define HASHBIN_SIZE   8
-#define HASHBIN_MASK   0x7
-
-#ifndef IRDA_ALIGN 
-#define IRDA_ALIGN __attribute__((aligned))
-#endif
-
-#define Q_NULL { NULL, NULL, "", 0 }
-
-typedef void (*FREE_FUNC)(void *arg);
-
-struct irda_queue {
-	struct irda_queue *q_next;
-	struct irda_queue *q_prev;
-
-	char   q_name[NAME_SIZE];
-	long   q_hash;			/* Must be able to cast a (void *) */
-};
-typedef struct irda_queue irda_queue_t;
-
-typedef struct hashbin_t {
-	__u32      magic;
-	int        hb_type;
-	int        hb_size;
-	spinlock_t hb_spinlock;		/* HB_LOCK - Can be used by the user */
-
-	irda_queue_t* hb_queue[HASHBIN_SIZE] IRDA_ALIGN;
-
-	irda_queue_t* hb_current;
-} hashbin_t;
-
-hashbin_t *hashbin_new(int type);
-int      hashbin_delete(hashbin_t* hashbin, FREE_FUNC func);
-int      hashbin_clear(hashbin_t* hashbin, FREE_FUNC free_func);
-void     hashbin_insert(hashbin_t* hashbin, irda_queue_t* entry, long hashv, 
-			const char* name);
-void*    hashbin_remove(hashbin_t* hashbin, long hashv, const char* name);
-void*    hashbin_remove_first(hashbin_t *hashbin);
-void*	 hashbin_remove_this( hashbin_t* hashbin, irda_queue_t* entry);
-void*    hashbin_find(hashbin_t* hashbin, long hashv, const char* name);
-void*    hashbin_lock_find(hashbin_t* hashbin, long hashv, const char* name);
-void*    hashbin_find_next(hashbin_t* hashbin, long hashv, const char* name,
-			   void ** pnext);
-irda_queue_t *hashbin_get_first(hashbin_t *hashbin);
-irda_queue_t *hashbin_get_next(hashbin_t *hashbin);
-
-#define HASHBIN_GET_SIZE(hashbin) hashbin->hb_size
-
-#endif
diff --git a/include/net/irda/irttp.h b/include/net/irda/irttp.h
deleted file mode 100644
index 98682d4bae8f..000000000000
--- a/include/net/irda/irttp.h
+++ /dev/null
@@ -1,210 +0,0 @@
-/*********************************************************************
- *                
- * Filename:      irttp.h
- * Version:       1.0
- * Description:   Tiny Transport Protocol (TTP) definitions
- * Status:        Experimental.
- * Author:        Dag Brattli <dagb@cs.uit.no>
- * Created at:    Sun Aug 31 20:14:31 1997
- * Modified at:   Sun Dec 12 13:09:07 1999
- * Modified by:   Dag Brattli <dagb@cs.uit.no>
- * 
- *     Copyright (c) 1998-1999 Dag Brattli <dagb@cs.uit.no>, 
- *     All Rights Reserved.
- *     Copyright (c) 2000-2002 Jean Tourrilhes <jt@hpl.hp.com>
- *     
- *     This program is free software; you can redistribute it and/or 
- *     modify it under the terms of the GNU General Public License as 
- *     published by the Free Software Foundation; either version 2 of 
- *     the License, or (at your option) any later version.
- *
- *     Neither Dag Brattli nor University of Tromsø admit liability nor
- *     provide warranty for any of this software. This material is 
- *     provided "AS-IS" and at no charge.
- *
- ********************************************************************/
-
-#ifndef IRTTP_H
-#define IRTTP_H
-
-#include <linux/types.h>
-#include <linux/skbuff.h>
-#include <linux/spinlock.h>
-
-#include <net/irda/irda.h>
-#include <net/irda/irlmp.h>		/* struct lsap_cb */
-#include <net/irda/qos.h>		/* struct qos_info */
-#include <net/irda/irqueue.h>
-
-#define TTP_MAX_CONNECTIONS    LM_MAX_CONNECTIONS
-#define TTP_HEADER             1
-#define TTP_MAX_HEADER         (TTP_HEADER + LMP_MAX_HEADER)
-#define TTP_SAR_HEADER         5
-#define TTP_PARAMETERS         0x80
-#define TTP_MORE               0x80
-
-/* Transmission queue sizes */
-/* Worst case scenario, two window of data - Jean II */
-#define TTP_TX_MAX_QUEUE	14
-/* We need to keep at least 5 frames to make sure that we can refill
- * appropriately the LAP layer. LAP keeps only two buffers, and we need
- * to have 7 to make a full window - Jean II */
-#define TTP_TX_LOW_THRESHOLD	5
-/* Most clients are synchronous with respect to flow control, so we can
- * keep a low number of Tx buffers in TTP - Jean II */
-#define TTP_TX_HIGH_THRESHOLD	7
-
-/* Receive queue sizes */
-/* Minimum of credit that the peer should hold.
- * If the peer has less credits than 9 frames, we will explicitly send
- * him some credits (through irttp_give_credit() and a specific frame).
- * Note that when we give credits it's likely that it won't be sent in
- * this LAP window, but in the next one. So, we make sure that the peer
- * has something to send while waiting for credits (one LAP window == 7
- * + 1 frames while he process the credits). - Jean II */
-#define TTP_RX_MIN_CREDIT	8
-/* This is the default maximum number of credits held by the peer, so the
- * default maximum number of frames he can send us before needing flow
- * control answer from us (this may be negociated differently at TSAP setup).
- * We want to minimise the number of times we have to explicitly send some
- * credit to the peer, hoping we can piggyback it on the return data. In
- * particular, it doesn't make sense for us to send credit more than once
- * per LAP window.
- * Moreover, giving credits has some latency, so we need strictly more than
- * a LAP window, otherwise we may already have credits in our Tx queue.
- * But on the other hand, we don't want to keep too many Rx buffer here
- * before starting to flow control the other end, so make it exactly one
- * LAP window + 1 + MIN_CREDITS. - Jean II */
-#define TTP_RX_DEFAULT_CREDIT	16
-/* Maximum number of credits we can allow the peer to have, and therefore
- * maximum Rx queue size.
- * Note that we try to deliver packets to the higher layer every time we
- * receive something, so in normal mode the Rx queue will never contains
- * more than one or two packets. - Jean II */
-#define TTP_RX_MAX_CREDIT	21
-
-/* What clients should use when calling ttp_open_tsap() */
-#define DEFAULT_INITIAL_CREDIT	TTP_RX_DEFAULT_CREDIT
-
-/* Some priorities for disconnect requests */
-#define P_NORMAL    0
-#define P_HIGH      1
-
-#define TTP_SAR_DISABLE 0
-#define TTP_SAR_UNBOUND 0xffffffff
-
-/* Parameters */
-#define TTP_MAX_SDU_SIZE 0x01
-
-/*
- *  This structure contains all data associated with one instance of a TTP 
- *  connection.
- */
-struct tsap_cb {
-	irda_queue_t q;            /* Must be first */
-	magic_t magic;        /* Just in case */
-
-	__u8 stsap_sel;       /* Source TSAP */
-	__u8 dtsap_sel;       /* Destination TSAP */
-
-	struct lsap_cb *lsap; /* Corresponding LSAP to this TSAP */
-
-	__u8 connected;       /* TSAP connected */
-	 
-	__u8 initial_credit;  /* Initial credit to give peer */
-
-        int avail_credit;    /* Available credit to return to peer */
-	int remote_credit;   /* Credit held by peer TTP entity */
-	int send_credit;     /* Credit held by local TTP entity */
-	
-	struct sk_buff_head tx_queue; /* Frames to be transmitted */
-	struct sk_buff_head rx_queue; /* Received frames */
-	struct sk_buff_head rx_fragments;
-	int tx_queue_lock;
-	int rx_queue_lock;
-	spinlock_t lock;
-
-	notify_t notify;       /* Callbacks to client layer */
-
-	struct net_device_stats stats;
-	struct timer_list todo_timer; 
-
-	__u32 max_seg_size;     /* Max data that fit into an IrLAP frame */
-	__u8  max_header_size;
-
-	int   rx_sdu_busy;     /* RxSdu.busy */
-	__u32 rx_sdu_size;     /* Current size of a partially received frame */
-	__u32 rx_max_sdu_size; /* Max receive user data size */
-
-	int tx_sdu_busy;       /* TxSdu.busy */
-	__u32 tx_max_sdu_size; /* Max transmit user data size */
-
-	int close_pend;        /* Close, but disconnect_pend */
-	unsigned long disconnect_pend; /* Disconnect, but still data to send */
-	struct sk_buff *disconnect_skb;
-};
-
-struct irttp_cb {
-	magic_t    magic;	
-	hashbin_t *tsaps;
-};
-
-int  irttp_init(void);
-void irttp_cleanup(void);
-
-struct tsap_cb *irttp_open_tsap(__u8 stsap_sel, int credit, notify_t *notify);
-int irttp_close_tsap(struct tsap_cb *self);
-
-int irttp_data_request(struct tsap_cb *self, struct sk_buff *skb);
-int irttp_udata_request(struct tsap_cb *self, struct sk_buff *skb);
-
-int irttp_connect_request(struct tsap_cb *self, __u8 dtsap_sel, 
-			  __u32 saddr, __u32 daddr,
-			  struct qos_info *qos, __u32 max_sdu_size, 
-			  struct sk_buff *userdata);
-int irttp_connect_response(struct tsap_cb *self, __u32 max_sdu_size, 
-			    struct sk_buff *userdata);
-int irttp_disconnect_request(struct tsap_cb *self, struct sk_buff *skb,
-			     int priority);
-void irttp_flow_request(struct tsap_cb *self, LOCAL_FLOW flow);
-struct tsap_cb *irttp_dup(struct tsap_cb *self, void *instance);
-
-static inline __u32 irttp_get_saddr(struct tsap_cb *self)
-{
-	return irlmp_get_saddr(self->lsap);
-}
-
-static inline __u32 irttp_get_daddr(struct tsap_cb *self)
-{
-	return irlmp_get_daddr(self->lsap);
-}
-
-static inline __u32 irttp_get_max_seg_size(struct tsap_cb *self)
-{
-	return self->max_seg_size;
-}
-
-/* After doing a irttp_dup(), this get one of the two socket back into
- * a state where it's waiting incoming connections.
- * Note : this can be used *only* if the socket is not yet connected
- * (i.e. NO irttp_connect_response() done on this socket).
- * - Jean II */
-static inline void irttp_listen(struct tsap_cb *self)
-{
-	irlmp_listen(self->lsap);
-	self->dtsap_sel = LSAP_ANY;
-}
-
-/* Return TRUE if the node is in primary mode (i.e. master)
- * - Jean II */
-static inline int irttp_is_primary(struct tsap_cb *self)
-{
-	if ((self == NULL) ||
-	    (self->lsap == NULL) ||
-	    (self->lsap->lap == NULL) ||
-	    (self->lsap->lap->irlap == NULL))
-		return -2;
-	return irlap_is_primary(self->lsap->lap->irlap);
-}
-
-#endif /* IRTTP_H */
diff --git a/include/net/irda/parameters.h b/include/net/irda/parameters.h
deleted file mode 100644
index 2d9cd0007cba..000000000000
--- a/include/net/irda/parameters.h
+++ /dev/null
@@ -1,100 +0,0 @@
-/*********************************************************************
- *                
- * Filename:      parameters.h
- * Version:       1.0
- * Description:   A more general way to handle (pi,pl,pv) parameters
- * Status:        Experimental.
- * Author:        Dag Brattli <dagb@cs.uit.no>
- * Created at:    Mon Jun  7 08:47:28 1999
- * Modified at:   Sun Jan 30 14:05:14 2000
- * Modified by:   Dag Brattli <dagb@cs.uit.no>
- * 
- *     Copyright (c) 1999-2000 Dag Brattli, All Rights Reserved.
- *     
- *     This program is free software; you can redistribute it and/or 
- *     modify it under the terms of the GNU General Public License as 
- *     published by the Free Software Foundation; either version 2 of 
- *     the License, or (at your option) any later version.
- * 
- *     This program is distributed in the hope that it will be useful,
- *     but WITHOUT ANY WARRANTY; without even the implied warranty of
- *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- *     GNU General Public License for more details.
- * 
- *     You should have received a copy of the GNU General Public License 
- *     along with this program; if not, see <http://www.gnu.org/licenses/>.
- *
- *     Michel Dänzer <daenzer@debian.org>, 10/2001
- *     - simplify irda_pv_t to avoid endianness issues
- *     
- ********************************************************************/
-
-#ifndef IRDA_PARAMS_H
-#define IRDA_PARAMS_H
-
-/*
- *  The currently supported types. Beware not to change the sequence since
- *  it a good reason why the sized integers has a value equal to their size
- */
-typedef enum {
-	PV_INTEGER,      /* Integer of any (pl) length */
-	PV_INT_8_BITS,   /* Integer of 8 bits in length */
-	PV_INT_16_BITS,  /* Integer of 16 bits in length */
-	PV_STRING,       /* \0 terminated string */
-	PV_INT_32_BITS,  /* Integer of 32 bits in length */
-	PV_OCT_SEQ,      /* Octet sequence */
-	PV_NO_VALUE      /* Does not contain any value (pl=0) */
-} PV_TYPE;
-
-/* Bit 7 of type field */
-#define PV_BIG_ENDIAN    0x80 
-#define PV_LITTLE_ENDIAN 0x00
-#define PV_MASK          0x7f   /* To mask away endian bit */
-
-#define PV_PUT 0
-#define PV_GET 1
-
-typedef union {
-	char   *c;
-	__u32   i;
-	__u32 *ip;
-} irda_pv_t;
-
-typedef struct {
-	__u8 pi;
-	__u8 pl;
-	irda_pv_t pv;
-} irda_param_t;
-
-typedef int (*PI_HANDLER)(void *self, irda_param_t *param, int get);
-typedef int (*PV_HANDLER)(void *self, __u8 *buf, int len, __u8 pi,
-			  PV_TYPE type, PI_HANDLER func);
-
-typedef struct {
-	const PI_HANDLER func;  /* Handler for this parameter identifier */
-	PV_TYPE    type;  /* Data type for this parameter */
-} pi_minor_info_t;
-
-typedef struct {
-	const pi_minor_info_t *pi_minor_call_table;
-	int len;
-} pi_major_info_t;
-
-typedef struct {
-	const pi_major_info_t *tables;
-	int              len;
-	__u8             pi_mask;
-	int              pi_major_offset;
-} pi_param_info_t;
-
-int irda_param_pack(__u8 *buf, char *fmt, ...);
-
-int irda_param_insert(void *self, __u8 pi, __u8 *buf, int len, 
-		      pi_param_info_t *info);
-int irda_param_extract_all(void *self, __u8 *buf, int len, 
-			   pi_param_info_t *info);
-
-#define irda_param_insert_byte(buf,pi,pv) irda_param_pack(buf,"bbb",pi,1,pv)
-
-#endif /* IRDA_PARAMS_H */
-
diff --git a/include/net/irda/qos.h b/include/net/irda/qos.h
deleted file mode 100644
index 05a5a249956f..000000000000
--- a/include/net/irda/qos.h
+++ /dev/null
@@ -1,101 +0,0 @@
-/*********************************************************************
- *                
- * Filename:      qos.h
- * Version:       1.0
- * Description:   Quality of Service definitions
- * Status:        Experimental.
- * Author:        Dag Brattli <dagb@cs.uit.no>
- * Created at:    Fri Sep 19 23:21:09 1997
- * Modified at:   Thu Dec  2 13:51:54 1999
- * Modified by:   Dag Brattli <dagb@cs.uit.no>
- * 
- *     Copyright (c) 1999 Dag Brattli, All Rights Reserved.
- *     
- *     This program is free software; you can redistribute it and/or 
- *     modify it under the terms of the GNU General Public License as 
- *     published by the Free Software Foundation; either version 2 of 
- *     the License, or (at your option) any later version.
- * 
- *     This program is distributed in the hope that it will be useful,
- *     but WITHOUT ANY WARRANTY; without even the implied warranty of
- *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- *     GNU General Public License for more details.
- * 
- *     You should have received a copy of the GNU General Public License 
- *     along with this program; if not, see <http://www.gnu.org/licenses/>.
- *     
- ********************************************************************/
-
-#ifndef IRDA_QOS_H
-#define IRDA_QOS_H
-
-#include <linux/skbuff.h>
-
-#include <net/irda/parameters.h>
-
-#define PI_BAUD_RATE     0x01
-#define PI_MAX_TURN_TIME 0x82
-#define PI_DATA_SIZE     0x83
-#define PI_WINDOW_SIZE   0x84
-#define PI_ADD_BOFS      0x85
-#define PI_MIN_TURN_TIME 0x86
-#define PI_LINK_DISC     0x08
-
-#define IR_115200_MAX 0x3f
-
-/* Baud rates (first byte) */
-#define IR_2400     0x01
-#define IR_9600     0x02
-#define IR_19200    0x04
-#define IR_38400    0x08
-#define IR_57600    0x10
-#define IR_115200   0x20
-#define IR_576000   0x40
-#define IR_1152000  0x80
-
-/* Baud rates (second byte) */
-#define IR_4000000  0x01
-#define IR_16000000 0x02
-
-/* Quality of Service information */
-typedef struct {
-	__u32 value;
-	__u16 bits; /* LSB is first byte, MSB is second byte */
-} qos_value_t;
-
-struct qos_info {
-	magic_t magic;
-
-	qos_value_t baud_rate;       /* IR_11520O | ... */
-	qos_value_t max_turn_time;
-	qos_value_t data_size;
-	qos_value_t window_size;
-	qos_value_t additional_bofs;
-	qos_value_t min_turn_time;
-	qos_value_t link_disc_time;
-	
-	qos_value_t power;
-};
-
-extern int sysctl_max_baud_rate;
-extern int sysctl_max_inactive_time;
-
-void irda_init_max_qos_capabilies(struct qos_info *qos);
-void irda_qos_compute_intersection(struct qos_info *, struct qos_info *);
-
-__u32 irlap_max_line_capacity(__u32 speed, __u32 max_turn_time);
-
-void irda_qos_bits_to_value(struct qos_info *qos);
-
-/* So simple, how could we not inline those two ?
- * Note : one byte is 10 bits if you include start and stop bits
- * Jean II */
-#define irlap_min_turn_time_in_bytes(speed, min_turn_time) (	\
-	speed * min_turn_time / 10000000			\
-)
-#define irlap_xbofs_in_usec(speed, xbofs) (			\
-	xbofs * 10000000 / speed				\
-)
-
-#endif
-
diff --git a/include/net/irda/timer.h b/include/net/irda/timer.h
deleted file mode 100644
index d784f242cf7b..000000000000
--- a/include/net/irda/timer.h
+++ /dev/null
@@ -1,105 +0,0 @@
-/*********************************************************************
- *                
- * Filename:      timer.h
- * Version:       
- * Description:   
- * Status:        Experimental.
- * Author:        Dag Brattli <dagb@cs.uit.no>
- * Created at:    Sat Aug 16 00:59:29 1997
- * Modified at:   Thu Oct  7 12:25:24 1999
- * Modified by:   Dag Brattli <dagb@cs.uit.no>
- * 
- *     Copyright (c) 1997, 1998-1999 Dag Brattli <dagb@cs.uit.no>, 
- *     All Rights Reserved.
- *     Copyright (c) 2000-2002 Jean Tourrilhes <jt@hpl.hp.com>
- *     
- *     This program is free software; you can redistribute it and/or 
- *     modify it under the terms of the GNU General Public License as 
- *     published by the Free Software Foundation; either version 2 of 
- *     the License, or (at your option) any later version.
- *
- *     Neither Dag Brattli nor University of Tromsø admit liability nor
- *     provide warranty for any of this software. This material is 
- *     provided "AS-IS" and at no charge.
- *
- ********************************************************************/
-
-#ifndef TIMER_H
-#define TIMER_H
-
-#include <linux/timer.h>
-#include <linux/jiffies.h>
-
-#include <asm/param.h>  /* for HZ */
-
-#include <net/irda/irda.h>
-
-/* A few forward declarations (to make compiler happy) */
-struct irlmp_cb;
-struct irlap_cb;
-struct lsap_cb;
-struct lap_cb;
-
-/* 
- *  Timeout definitions, some defined in IrLAP 6.13.5 - p. 92
- */
-#define POLL_TIMEOUT        (450*HZ/1000)    /* Must never exceed 500 ms */
-#define FINAL_TIMEOUT       (500*HZ/1000)    /* Must never exceed 500 ms */
-
-/* 
- *  Normally twice of p-timer. Note 3, IrLAP 6.3.11.2 - p. 60 suggests
- *  at least twice duration of the P-timer.
- */
-#define WD_TIMEOUT          (POLL_TIMEOUT*2)
-
-#define MEDIABUSY_TIMEOUT   (500*HZ/1000)    /* 500 msec */
-#define SMALLBUSY_TIMEOUT   (100*HZ/1000)    /* 100 msec - IrLAP 6.13.4 */
-
-/*
- *  Slot timer must never exceed 85 ms, and must always be at least 25 ms, 
- *  suggested to  75-85 msec by IrDA lite. This doesn't work with a lot of
- *  devices, and other stackes uses a lot more, so it's best we do it as well
- *  (Note : this is the default value and sysctl overrides it - Jean II)
- */
-#define SLOT_TIMEOUT            (90*HZ/1000)
-
-/* 
- *  The latest discovery frame (XID) is longer due to the extra discovery
- *  information (hints, device name...). This is its extra length.
- *  We use that when setting the query timeout. Jean II
- */
-#define XIDEXTRA_TIMEOUT        (34*HZ/1000)  /* 34 msec */
-
-#define WATCHDOG_TIMEOUT        (20*HZ)       /* 20 sec */
-
-typedef void (*TIMER_CALLBACK)(void *);
-
-static inline void irda_start_timer(struct timer_list *ptimer, int timeout, 
-				    void* data, TIMER_CALLBACK callback)
-{
-	ptimer->function = (void (*)(unsigned long)) callback;
-	ptimer->data = (unsigned long) data;
-	
-	/* Set new value for timer (update or add timer).
-	 * We use mod_timer() because it's more efficient and also
-	 * safer with respect to race conditions - Jean II */
-	mod_timer(ptimer, jiffies + timeout);
-}
-
-
-void irlap_start_slot_timer(struct irlap_cb *self, int timeout);
-void irlap_start_query_timer(struct irlap_cb *self, int S, int s);
-void irlap_start_final_timer(struct irlap_cb *self, int timeout);
-void irlap_start_wd_timer(struct irlap_cb *self, int timeout);
-void irlap_start_backoff_timer(struct irlap_cb *self, int timeout);
-
-void irlap_start_mbusy_timer(struct irlap_cb *self, int timeout);
-void irlap_stop_mbusy_timer(struct irlap_cb *);
-
-void irlmp_start_watchdog_timer(struct lsap_cb *, int timeout);
-void irlmp_start_discovery_timer(struct irlmp_cb *, int timeout);
-void irlmp_start_idle_timer(struct lap_cb *, int timeout);
-void irlmp_stop_idle_timer(struct lap_cb *self);
-
-#endif
-
diff --git a/include/net/irda/wrapper.h b/include/net/irda/wrapper.h
deleted file mode 100644
index eef53ebe3d76..000000000000
--- a/include/net/irda/wrapper.h
+++ /dev/null
@@ -1,58 +0,0 @@
-/*********************************************************************
- *                
- * Filename:      wrapper.h
- * Version:       1.2
- * Description:   IrDA SIR async wrapper layer
- * Status:        Experimental.
- * Author:        Dag Brattli <dagb@cs.uit.no>
- * Created at:    Mon Aug  4 20:40:53 1997
- * Modified at:   Tue Jan 11 12:37:29 2000
- * Modified by:   Dag Brattli <dagb@cs.uit.no>
- * 
- *     Copyright (c) 1998-2000 Dag Brattli <dagb@cs.uit.no>, 
- *     All Rights Reserved.
- *     
- *     This program is free software; you can redistribute it and/or 
- *     modify it under the terms of the GNU General Public License as 
- *     published by the Free Software Foundation; either version 2 of 
- *     the License, or (at your option) any later version.
- *
- *     Neither Dag Brattli nor University of Tromsø admit liability nor
- *     provide warranty for any of this software. This material is 
- *     provided "AS-IS" and at no charge.
- *
- ********************************************************************/
-
-#ifndef WRAPPER_H
-#define WRAPPER_H
-
-#include <linux/types.h>
-#include <linux/skbuff.h>
-#include <linux/netdevice.h>
-
-#include <net/irda/irda_device.h>	/* iobuff_t */
-
-#define BOF  0xc0 /* Beginning of frame */
-#define XBOF 0xff
-#define EOF  0xc1 /* End of frame */
-#define CE   0x7d /* Control escape */
-
-#define STA BOF  /* Start flag */
-#define STO EOF  /* End flag */
-
-#define IRDA_TRANS 0x20    /* Asynchronous transparency modifier */       
-
-/* States for receiving a frame in async mode */
-enum {
-	OUTSIDE_FRAME, 
-	BEGIN_FRAME, 
-	LINK_ESCAPE, 
-	INSIDE_FRAME
-};
-
-/* Proto definitions */
-int async_wrap_skb(struct sk_buff *skb, __u8 *tx_buff, int buffsize);
-void async_unwrap_char(struct net_device *dev, struct net_device_stats *stats,
-		       iobuff_t *buf, __u8 byte);
-
-#endif
-- 
cgit v1.2.3


From 21acf63013ed3d6fce3176cc34b74064052a31b4 Mon Sep 17 00:00:00 2001
From: Samuel Mendoza-Jonas <sam@mendozajonas.com>
Date: Mon, 28 Aug 2017 16:18:42 +1000
Subject: net/ncsi: Configure VLAN tag filter

Make use of the ndo_vlan_rx_{add,kill}_vid callbacks to have the NCSI
stack process new VLAN tags and configure the channel VLAN filter
appropriately.
Several VLAN tags can be set and a "Set VLAN Filter" packet must be sent
for each one, meaning the ncsi_dev_state_config_svf state must be
repeated. An internal list of VLAN tags is maintained, and compared
against the current channel's ncsi_channel_filter in order to keep track
within the state. VLAN filters are removed in a similar manner, with the
introduction of the ncsi_dev_state_config_clear_vids state. The maximum
number of VLAN tag filters is determined by the "Get Capabilities"
response from the channel.

Signed-off-by: Samuel Mendoza-Jonas <sam@mendozajonas.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ncsi.h     |   2 +
 net/ncsi/internal.h    |  11 ++
 net/ncsi/ncsi-manage.c | 308 ++++++++++++++++++++++++++++++++++++++++++++++++-
 net/ncsi/ncsi-rsp.c    |   9 +-
 4 files changed, 326 insertions(+), 4 deletions(-)

(limited to 'include/net')

diff --git a/include/net/ncsi.h b/include/net/ncsi.h
index 68680baac0fd..1f96af46df49 100644
--- a/include/net/ncsi.h
+++ b/include/net/ncsi.h
@@ -28,6 +28,8 @@ struct ncsi_dev {
 };
 
 #ifdef CONFIG_NET_NCSI
+int ncsi_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid);
+int ncsi_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid);
 struct ncsi_dev *ncsi_register_dev(struct net_device *dev,
 				   void (*notifier)(struct ncsi_dev *nd));
 int ncsi_start_dev(struct ncsi_dev *nd);
diff --git a/net/ncsi/internal.h b/net/ncsi/internal.h
index 1308a56f2591..af3d636534ef 100644
--- a/net/ncsi/internal.h
+++ b/net/ncsi/internal.h
@@ -180,6 +180,7 @@ struct ncsi_channel {
 #define NCSI_CHANNEL_INACTIVE		1
 #define NCSI_CHANNEL_ACTIVE		2
 #define NCSI_CHANNEL_INVISIBLE		3
+	bool                        reconfigure_needed;
 	spinlock_t                  lock;	/* Protect filters etc */
 	struct ncsi_package         *package;
 	struct ncsi_channel_version version;
@@ -235,6 +236,9 @@ enum {
 	ncsi_dev_state_probe_dp,
 	ncsi_dev_state_config_sp	= 0x0301,
 	ncsi_dev_state_config_cis,
+	ncsi_dev_state_config_clear_vids,
+	ncsi_dev_state_config_svf,
+	ncsi_dev_state_config_ev,
 	ncsi_dev_state_config_sma,
 	ncsi_dev_state_config_ebf,
 #if IS_ENABLED(CONFIG_IPV6)
@@ -253,6 +257,12 @@ enum {
 	ncsi_dev_state_suspend_done
 };
 
+struct vlan_vid {
+	struct list_head list;
+	__be16 proto;
+	u16 vid;
+};
+
 struct ncsi_dev_priv {
 	struct ncsi_dev     ndev;            /* Associated NCSI device     */
 	unsigned int        flags;           /* NCSI device flags          */
@@ -276,6 +286,7 @@ struct ncsi_dev_priv {
 	struct work_struct  work;            /* For channel management     */
 	struct packet_type  ptype;           /* NCSI packet Rx handler     */
 	struct list_head    node;            /* Form NCSI device list      */
+	struct list_head    vlan_vids;       /* List of active VLAN IDs */
 };
 
 struct ncsi_cmd_arg {
diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c
index a3bd5fa8ad09..11904b3b702d 100644
--- a/net/ncsi/ncsi-manage.c
+++ b/net/ncsi/ncsi-manage.c
@@ -38,6 +38,25 @@ static inline int ncsi_filter_size(int table)
 	return sizes[table];
 }
 
+u32 *ncsi_get_filter(struct ncsi_channel *nc, int table, int index)
+{
+	struct ncsi_channel_filter *ncf;
+	int size;
+
+	ncf = nc->filters[table];
+	if (!ncf)
+		return NULL;
+
+	size = ncsi_filter_size(table);
+	if (size < 0)
+		return NULL;
+
+	return ncf->data + size * index;
+}
+
+/* Find the first active filter in a filter table that matches the given
+ * data parameter. If data is NULL, this returns the first active filter.
+ */
 int ncsi_find_filter(struct ncsi_channel *nc, int table, void *data)
 {
 	struct ncsi_channel_filter *ncf;
@@ -58,7 +77,7 @@ int ncsi_find_filter(struct ncsi_channel *nc, int table, void *data)
 	index = -1;
 	while ((index = find_next_bit(bitmap, ncf->total, index + 1))
 	       < ncf->total) {
-		if (!memcmp(ncf->data + size * index, data, size)) {
+		if (!data || !memcmp(ncf->data + size * index, data, size)) {
 			spin_unlock_irqrestore(&nc->lock, flags);
 			return index;
 		}
@@ -639,6 +658,95 @@ error:
 	nd->state = ncsi_dev_state_functional;
 }
 
+/* Check the VLAN filter bitmap for a set filter, and construct a
+ * "Set VLAN Filter - Disable" packet if found.
+ */
+static int clear_one_vid(struct ncsi_dev_priv *ndp, struct ncsi_channel *nc,
+			 struct ncsi_cmd_arg *nca)
+{
+	int index;
+	u32 *data;
+	u16 vid;
+
+	index = ncsi_find_filter(nc, NCSI_FILTER_VLAN, NULL);
+	if (index < 0) {
+		/* Filter table empty */
+		return -1;
+	}
+
+	data = ncsi_get_filter(nc, NCSI_FILTER_VLAN, index);
+	if (!data) {
+		netdev_err(ndp->ndev.dev,
+			   "ncsi: failed to retrieve filter %d\n", index);
+		/* Set the VLAN id to 0 - this will still disable the entry in
+		 * the filter table, but we won't know what it was.
+		 */
+		vid = 0;
+	} else {
+		vid = *(u16 *)data;
+	}
+
+	netdev_printk(KERN_DEBUG, ndp->ndev.dev,
+		      "ncsi: removed vlan tag %u at index %d\n",
+		      vid, index + 1);
+	ncsi_remove_filter(nc, NCSI_FILTER_VLAN, index);
+
+	nca->type = NCSI_PKT_CMD_SVF;
+	nca->words[1] = vid;
+	/* HW filter index starts at 1 */
+	nca->bytes[6] = index + 1;
+	nca->bytes[7] = 0x00;
+	return 0;
+}
+
+/* Find an outstanding VLAN tag and constuct a "Set VLAN Filter - Enable"
+ * packet.
+ */
+static int set_one_vid(struct ncsi_dev_priv *ndp, struct ncsi_channel *nc,
+		       struct ncsi_cmd_arg *nca)
+{
+	struct vlan_vid *vlan = NULL;
+	int index = 0;
+
+	list_for_each_entry_rcu(vlan, &ndp->vlan_vids, list) {
+		index = ncsi_find_filter(nc, NCSI_FILTER_VLAN, &vlan->vid);
+		if (index < 0) {
+			/* New tag to add */
+			netdev_printk(KERN_DEBUG, ndp->ndev.dev,
+				      "ncsi: new vlan id to set: %u\n",
+				      vlan->vid);
+			break;
+		}
+		netdev_printk(KERN_DEBUG, ndp->ndev.dev,
+			      "vid %u already at filter pos %d\n",
+			      vlan->vid, index);
+	}
+
+	if (!vlan || index >= 0) {
+		netdev_printk(KERN_DEBUG, ndp->ndev.dev,
+			      "no vlan ids left to set\n");
+		return -1;
+	}
+
+	index = ncsi_add_filter(nc, NCSI_FILTER_VLAN, &vlan->vid);
+	if (index < 0) {
+		netdev_err(ndp->ndev.dev,
+			   "Failed to add new VLAN tag, error %d\n", index);
+		return -1;
+	}
+
+	netdev_printk(KERN_DEBUG, ndp->ndev.dev,
+		      "ncsi: set vid %u in packet, index %u\n",
+		      vlan->vid, index + 1);
+	nca->type = NCSI_PKT_CMD_SVF;
+	nca->words[1] = vlan->vid;
+	/* HW filter index starts at 1 */
+	nca->bytes[6] = index + 1;
+	nca->bytes[7] = 0x01;
+
+	return 0;
+}
+
 static void ncsi_configure_channel(struct ncsi_dev_priv *ndp)
 {
 	struct ncsi_dev *nd = &ndp->ndev;
@@ -683,8 +791,11 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp)
 		if (ret)
 			goto error;
 
-		nd->state = ncsi_dev_state_config_sma;
+		nd->state = ncsi_dev_state_config_clear_vids;
 		break;
+	case ncsi_dev_state_config_clear_vids:
+	case ncsi_dev_state_config_svf:
+	case ncsi_dev_state_config_ev:
 	case ncsi_dev_state_config_sma:
 	case ncsi_dev_state_config_ebf:
 #if IS_ENABLED(CONFIG_IPV6)
@@ -699,11 +810,40 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp)
 		nca.package = np->id;
 		nca.channel = nc->id;
 
+		/* Clear any active filters on the channel before setting */
+		if (nd->state == ncsi_dev_state_config_clear_vids) {
+			ret = clear_one_vid(ndp, nc, &nca);
+			if (ret) {
+				nd->state = ncsi_dev_state_config_svf;
+				schedule_work(&ndp->work);
+				break;
+			}
+			/* Repeat */
+			nd->state = ncsi_dev_state_config_clear_vids;
+		/* Add known VLAN tags to the filter */
+		} else if (nd->state == ncsi_dev_state_config_svf) {
+			ret = set_one_vid(ndp, nc, &nca);
+			if (ret) {
+				nd->state = ncsi_dev_state_config_ev;
+				schedule_work(&ndp->work);
+				break;
+			}
+			/* Repeat */
+			nd->state = ncsi_dev_state_config_svf;
+		/* Enable/Disable the VLAN filter */
+		} else if (nd->state == ncsi_dev_state_config_ev) {
+			if (list_empty(&ndp->vlan_vids)) {
+				nca.type = NCSI_PKT_CMD_DV;
+			} else {
+				nca.type = NCSI_PKT_CMD_EV;
+				nca.bytes[3] = NCSI_CAP_VLAN_NO;
+			}
+			nd->state = ncsi_dev_state_config_sma;
+		} else if (nd->state == ncsi_dev_state_config_sma) {
 		/* Use first entry in unicast filter table. Note that
 		 * the MAC filter table starts from entry 1 instead of
 		 * 0.
 		 */
-		if (nd->state == ncsi_dev_state_config_sma) {
 			nca.type = NCSI_PKT_CMD_SMA;
 			for (index = 0; index < 6; index++)
 				nca.bytes[index] = dev->dev_addr[index];
@@ -751,6 +891,25 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp)
 		break;
 	case ncsi_dev_state_config_done:
 		spin_lock_irqsave(&nc->lock, flags);
+		if (nc->reconfigure_needed) {
+			/* This channel's configuration has been updated
+			 * part-way during the config state - start the
+			 * channel configuration over
+			 */
+			nc->reconfigure_needed = false;
+			nc->state = NCSI_CHANNEL_INACTIVE;
+			spin_unlock_irqrestore(&nc->lock, flags);
+
+			spin_lock_irqsave(&ndp->lock, flags);
+			list_add_tail_rcu(&nc->link, &ndp->channel_queue);
+			spin_unlock_irqrestore(&ndp->lock, flags);
+
+			netdev_printk(KERN_DEBUG, dev,
+				      "Dirty NCSI channel state reset\n");
+			ncsi_process_next_channel(ndp);
+			break;
+		}
+
 		if (nc->modes[NCSI_MODE_LINK].data[2] & 0x1) {
 			hot_nc = nc;
 			nc->state = NCSI_CHANNEL_ACTIVE;
@@ -1191,6 +1350,148 @@ static struct notifier_block ncsi_inet6addr_notifier = {
 };
 #endif /* CONFIG_IPV6 */
 
+static int ncsi_kick_channels(struct ncsi_dev_priv *ndp)
+{
+	struct ncsi_dev *nd = &ndp->ndev;
+	struct ncsi_channel *nc;
+	struct ncsi_package *np;
+	unsigned long flags;
+	unsigned int n = 0;
+
+	NCSI_FOR_EACH_PACKAGE(ndp, np) {
+		NCSI_FOR_EACH_CHANNEL(np, nc) {
+			spin_lock_irqsave(&nc->lock, flags);
+
+			/* Channels may be busy, mark dirty instead of
+			 * kicking if;
+			 * a) not ACTIVE (configured)
+			 * b) in the channel_queue (to be configured)
+			 * c) it's ndev is in the config state
+			 */
+			if (nc->state != NCSI_CHANNEL_ACTIVE) {
+				if ((ndp->ndev.state & 0xff00) ==
+						ncsi_dev_state_config ||
+						!list_empty(&nc->link)) {
+					netdev_printk(KERN_DEBUG, nd->dev,
+						      "ncsi: channel %p marked dirty\n",
+						      nc);
+					nc->reconfigure_needed = true;
+				}
+				spin_unlock_irqrestore(&nc->lock, flags);
+				continue;
+			}
+
+			spin_unlock_irqrestore(&nc->lock, flags);
+
+			ncsi_stop_channel_monitor(nc);
+			spin_lock_irqsave(&nc->lock, flags);
+			nc->state = NCSI_CHANNEL_INACTIVE;
+			spin_unlock_irqrestore(&nc->lock, flags);
+
+			spin_lock_irqsave(&ndp->lock, flags);
+			list_add_tail_rcu(&nc->link, &ndp->channel_queue);
+			spin_unlock_irqrestore(&ndp->lock, flags);
+
+			netdev_printk(KERN_DEBUG, nd->dev,
+				      "ncsi: kicked channel %p\n", nc);
+			n++;
+		}
+	}
+
+	return n;
+}
+
+int ncsi_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid)
+{
+	struct ncsi_channel_filter *ncf;
+	struct ncsi_dev_priv *ndp;
+	unsigned int n_vids = 0;
+	struct vlan_vid *vlan;
+	struct ncsi_dev *nd;
+	bool found = false;
+
+	if (vid == 0)
+		return 0;
+
+	nd = ncsi_find_dev(dev);
+	if (!nd) {
+		netdev_warn(dev, "ncsi: No net_device?\n");
+		return 0;
+	}
+
+	ndp = TO_NCSI_DEV_PRIV(nd);
+	ncf = ndp->hot_channel->filters[NCSI_FILTER_VLAN];
+
+	/* Add the VLAN id to our internal list */
+	list_for_each_entry_rcu(vlan, &ndp->vlan_vids, list) {
+		n_vids++;
+		if (vlan->vid == vid) {
+			netdev_printk(KERN_DEBUG, dev,
+				      "vid %u already registered\n", vid);
+			return 0;
+		}
+	}
+
+	if (n_vids >= ncf->total) {
+		netdev_info(dev,
+			    "NCSI Channel supports up to %u VLAN tags but %u are already set\n",
+			    ncf->total, n_vids);
+		return -EINVAL;
+	}
+
+	vlan = kzalloc(sizeof(*vlan), GFP_KERNEL);
+	if (!vlan)
+		return -ENOMEM;
+
+	vlan->proto = proto;
+	vlan->vid = vid;
+	list_add_rcu(&vlan->list, &ndp->vlan_vids);
+
+	netdev_printk(KERN_DEBUG, dev, "Added new vid %u\n", vid);
+
+	found = ncsi_kick_channels(ndp) != 0;
+
+	return found ? ncsi_process_next_channel(ndp) : 0;
+}
+
+int ncsi_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid)
+{
+	struct vlan_vid *vlan, *tmp;
+	struct ncsi_dev_priv *ndp;
+	struct ncsi_dev *nd;
+	bool found = false;
+
+	if (vid == 0)
+		return 0;
+
+	nd = ncsi_find_dev(dev);
+	if (!nd) {
+		netdev_warn(dev, "ncsi: no net_device?\n");
+		return 0;
+	}
+
+	ndp = TO_NCSI_DEV_PRIV(nd);
+
+	/* Remove the VLAN id from our internal list */
+	list_for_each_entry_safe(vlan, tmp, &ndp->vlan_vids, list)
+		if (vlan->vid == vid) {
+			netdev_printk(KERN_DEBUG, dev,
+				      "vid %u found, removing\n", vid);
+			list_del_rcu(&vlan->list);
+			found = true;
+			kfree(vlan);
+		}
+
+	if (!found) {
+		netdev_err(dev, "ncsi: vid %u wasn't registered!\n", vid);
+		return -EINVAL;
+	}
+
+	found = ncsi_kick_channels(ndp) != 0;
+
+	return found ? ncsi_process_next_channel(ndp) : 0;
+}
+
 struct ncsi_dev *ncsi_register_dev(struct net_device *dev,
 				   void (*handler)(struct ncsi_dev *ndev))
 {
@@ -1215,6 +1516,7 @@ struct ncsi_dev *ncsi_register_dev(struct net_device *dev,
 	nd->handler = handler;
 	ndp->pending_req_num = 0;
 	INIT_LIST_HEAD(&ndp->channel_queue);
+	INIT_LIST_HEAD(&ndp->vlan_vids);
 	INIT_WORK(&ndp->work, ncsi_dev_work);
 
 	/* Initialize private NCSI device */
diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c
index c1a191d790e2..265b9a892d41 100644
--- a/net/ncsi/ncsi-rsp.c
+++ b/net/ncsi/ncsi-rsp.c
@@ -694,7 +694,14 @@ static int ncsi_rsp_handler_gc(struct ncsi_request *nr)
 
 		ncf->index = i;
 		ncf->total = cnt;
-		ncf->bitmap = 0x0ul;
+		if (i == NCSI_FILTER_VLAN) {
+			/* Set VLAN filters active so they are cleared in
+			 * first configuration state
+			 */
+			ncf->bitmap = U64_MAX;
+		} else {
+			ncf->bitmap = 0x0ul;
+		}
 		nc->filters[i] = ncf;
 	}
 
-- 
cgit v1.2.3


From e833251ad813168253fef9915aaf6a8c883337b0 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 29 Aug 2017 10:18:56 +0100
Subject: rxrpc: Add notification of end-of-Tx phase

Add a callback to rxrpc_kernel_send_data() so that a kernel service can get
a notification that the AF_RXRPC call has transitioned out the Tx phase and
is now waiting for a reply or a final ACK.

This is called from AF_RXRPC with the call state lock held so the
notification is guaranteed to come before any reply is passed back.

Further, modify the AFS filesystem to make use of this so that we don't have
to change the afs_call state before sending the last bit of data.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 Documentation/networking/rxrpc.txt | 12 +++++++++-
 fs/afs/rxrpc.c                     | 46 +++++++++++++++++++++++++++++---------
 include/net/af_rxrpc.h             |  5 ++++-
 net/rxrpc/sendmsg.c                | 34 ++++++++++++++++++++++------
 4 files changed, 77 insertions(+), 20 deletions(-)

(limited to 'include/net')

diff --git a/Documentation/networking/rxrpc.txt b/Documentation/networking/rxrpc.txt
index 8c70ba5dee4d..92a3c3bd5ac3 100644
--- a/Documentation/networking/rxrpc.txt
+++ b/Documentation/networking/rxrpc.txt
@@ -818,10 +818,15 @@ The kernel interface functions are as follows:
 
  (*) Send data through a call.
 
+	typedef void (*rxrpc_notify_end_tx_t)(struct sock *sk,
+					      unsigned long user_call_ID,
+					      struct sk_buff *skb);
+
 	int rxrpc_kernel_send_data(struct socket *sock,
 				   struct rxrpc_call *call,
 				   struct msghdr *msg,
-				   size_t len);
+				   size_t len,
+				   rxrpc_notify_end_tx_t notify_end_rx);
 
      This is used to supply either the request part of a client call or the
      reply part of a server call.  msg.msg_iovlen and msg.msg_iov specify the
@@ -832,6 +837,11 @@ The kernel interface functions are as follows:
      The msg must not specify a destination address, control data or any flags
      other than MSG_MORE.  len is the total amount of data to transmit.
 
+     notify_end_rx can be NULL or it can be used to specify a function to be
+     called when the call changes state to end the Tx phase.  This function is
+     called with the call-state spinlock held to prevent any reply or final ACK
+     from being delivered first.
+
  (*) Receive data from a call.
 
 	int rxrpc_kernel_recv_data(struct socket *sock,
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index 10743043d431..0bf191f0dbaf 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -291,6 +291,19 @@ static void afs_load_bvec(struct afs_call *call, struct msghdr *msg,
 	iov_iter_bvec(&msg->msg_iter, WRITE | ITER_BVEC, bv, nr, bytes);
 }
 
+/*
+ * Advance the AFS call state when the RxRPC call ends the transmit phase.
+ */
+static void afs_notify_end_request_tx(struct sock *sock,
+				      struct rxrpc_call *rxcall,
+				      unsigned long call_user_ID)
+{
+	struct afs_call *call = (struct afs_call *)call_user_ID;
+
+	if (call->state == AFS_CALL_REQUESTING)
+		call->state = AFS_CALL_AWAIT_REPLY;
+}
+
 /*
  * attach the data from a bunch of pages on an inode to a call
  */
@@ -310,14 +323,8 @@ static int afs_send_pages(struct afs_call *call, struct msghdr *msg)
 		bytes = msg->msg_iter.count;
 		nr = msg->msg_iter.nr_segs;
 
-		/* Have to change the state *before* sending the last
-		 * packet as RxRPC might give us the reply before it
-		 * returns from sending the request.
-		 */
-		if (first + nr - 1 >= last)
-			call->state = AFS_CALL_AWAIT_REPLY;
-		ret = rxrpc_kernel_send_data(afs_socket, call->rxcall,
-					     msg, bytes);
+		ret = rxrpc_kernel_send_data(afs_socket, call->rxcall, msg,
+					     bytes, afs_notify_end_request_tx);
 		for (loop = 0; loop < nr; loop++)
 			put_page(bv[loop].bv_page);
 		if (ret < 0)
@@ -409,7 +416,8 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp,
 	if (!call->send_pages)
 		call->state = AFS_CALL_AWAIT_REPLY;
 	ret = rxrpc_kernel_send_data(afs_socket, rxcall,
-				     &msg, call->request_size);
+				     &msg, call->request_size,
+				     afs_notify_end_request_tx);
 	if (ret < 0)
 		goto error_do_abort;
 
@@ -740,6 +748,20 @@ static int afs_deliver_cm_op_id(struct afs_call *call)
 	return call->type->deliver(call);
 }
 
+/*
+ * Advance the AFS call state when an RxRPC service call ends the transmit
+ * phase.
+ */
+static void afs_notify_end_reply_tx(struct sock *sock,
+				    struct rxrpc_call *rxcall,
+				    unsigned long call_user_ID)
+{
+	struct afs_call *call = (struct afs_call *)call_user_ID;
+
+	if (call->state == AFS_CALL_REPLYING)
+		call->state = AFS_CALL_AWAIT_ACK;
+}
+
 /*
  * send an empty reply
  */
@@ -759,7 +781,8 @@ void afs_send_empty_reply(struct afs_call *call)
 	msg.msg_flags		= 0;
 
 	call->state = AFS_CALL_AWAIT_ACK;
-	switch (rxrpc_kernel_send_data(afs_socket, call->rxcall, &msg, 0)) {
+	switch (rxrpc_kernel_send_data(afs_socket, call->rxcall, &msg, 0,
+				       afs_notify_end_reply_tx)) {
 	case 0:
 		_leave(" [replied]");
 		return;
@@ -797,7 +820,8 @@ void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len)
 	msg.msg_flags		= 0;
 
 	call->state = AFS_CALL_AWAIT_ACK;
-	n = rxrpc_kernel_send_data(afs_socket, call->rxcall, &msg, len);
+	n = rxrpc_kernel_send_data(afs_socket, call->rxcall, &msg, len,
+				   afs_notify_end_reply_tx);
 	if (n >= 0) {
 		/* Success */
 		_leave(" [replied]");
diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h
index c172709787af..07a47ee6f783 100644
--- a/include/net/af_rxrpc.h
+++ b/include/net/af_rxrpc.h
@@ -21,6 +21,8 @@ struct rxrpc_call;
 
 typedef void (*rxrpc_notify_rx_t)(struct sock *, struct rxrpc_call *,
 				  unsigned long);
+typedef void (*rxrpc_notify_end_tx_t)(struct sock *, struct rxrpc_call *,
+				      unsigned long);
 typedef void (*rxrpc_notify_new_call_t)(struct sock *, struct rxrpc_call *,
 					unsigned long);
 typedef void (*rxrpc_discard_new_call_t)(struct rxrpc_call *, unsigned long);
@@ -37,7 +39,8 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *,
 					   gfp_t,
 					   rxrpc_notify_rx_t);
 int rxrpc_kernel_send_data(struct socket *, struct rxrpc_call *,
-			   struct msghdr *, size_t);
+			   struct msghdr *, size_t,
+			   rxrpc_notify_end_tx_t);
 int rxrpc_kernel_recv_data(struct socket *, struct rxrpc_call *,
 			   void *, size_t, size_t *, bool, u32 *);
 bool rxrpc_kernel_abort_call(struct socket *, struct rxrpc_call *,
diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
index bc7f0241d92b..344fdce89823 100644
--- a/net/rxrpc/sendmsg.c
+++ b/net/rxrpc/sendmsg.c
@@ -100,12 +100,24 @@ static inline void rxrpc_instant_resend(struct rxrpc_call *call, int ix)
 	spin_unlock_bh(&call->lock);
 }
 
+/*
+ * Notify the owner of the call that the transmit phase is ended and the last
+ * packet has been queued.
+ */
+static void rxrpc_notify_end_tx(struct rxrpc_sock *rx, struct rxrpc_call *call,
+				rxrpc_notify_end_tx_t notify_end_tx)
+{
+	if (notify_end_tx)
+		notify_end_tx(&rx->sk, call, call->user_call_ID);
+}
+
 /*
  * Queue a DATA packet for transmission, set the resend timeout and send the
  * packet immediately
  */
-static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb,
-			       bool last)
+static void rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call,
+			       struct sk_buff *skb, bool last,
+			       rxrpc_notify_end_tx_t notify_end_tx)
 {
 	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
 	rxrpc_seq_t seq = sp->hdr.seq;
@@ -141,6 +153,7 @@ static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb,
 		switch (call->state) {
 		case RXRPC_CALL_CLIENT_SEND_REQUEST:
 			call->state = RXRPC_CALL_CLIENT_AWAIT_REPLY;
+			rxrpc_notify_end_tx(rx, call, notify_end_tx);
 			break;
 		case RXRPC_CALL_SERVER_ACK_REQUEST:
 			call->state = RXRPC_CALL_SERVER_SEND_REPLY;
@@ -153,6 +166,7 @@ static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb,
 				break;
 		case RXRPC_CALL_SERVER_SEND_REPLY:
 			call->state = RXRPC_CALL_SERVER_AWAIT_ACK;
+			rxrpc_notify_end_tx(rx, call, notify_end_tx);
 			break;
 		default:
 			break;
@@ -189,7 +203,8 @@ static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb,
  */
 static int rxrpc_send_data(struct rxrpc_sock *rx,
 			   struct rxrpc_call *call,
-			   struct msghdr *msg, size_t len)
+			   struct msghdr *msg, size_t len,
+			   rxrpc_notify_end_tx_t notify_end_tx)
 {
 	struct rxrpc_skb_priv *sp;
 	struct sk_buff *skb;
@@ -350,7 +365,9 @@ static int rxrpc_send_data(struct rxrpc_sock *rx,
 			if (ret < 0)
 				goto out;
 
-			rxrpc_queue_packet(call, skb, !msg_data_left(msg) && !more);
+			rxrpc_queue_packet(rx, call, skb,
+					   !msg_data_left(msg) && !more,
+					   notify_end_tx);
 			skb = NULL;
 		}
 	} while (msg_data_left(msg) > 0);
@@ -611,7 +628,7 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
 		/* Reply phase not begun or not complete for service call. */
 		ret = -EPROTO;
 	} else {
-		ret = rxrpc_send_data(rx, call, msg, len);
+		ret = rxrpc_send_data(rx, call, msg, len, NULL);
 	}
 
 	mutex_unlock(&call->user_mutex);
@@ -631,6 +648,7 @@ error_release_sock:
  * @call: The call to send data through
  * @msg: The data to send
  * @len: The amount of data to send
+ * @notify_end_tx: Notification that the last packet is queued.
  *
  * Allow a kernel service to send data on a call.  The call must be in an state
  * appropriate to sending data.  No control data should be supplied in @msg,
@@ -638,7 +656,8 @@ error_release_sock:
  * more data to come, otherwise this data will end the transmission phase.
  */
 int rxrpc_kernel_send_data(struct socket *sock, struct rxrpc_call *call,
-			   struct msghdr *msg, size_t len)
+			   struct msghdr *msg, size_t len,
+			   rxrpc_notify_end_tx_t notify_end_tx)
 {
 	int ret;
 
@@ -656,7 +675,8 @@ int rxrpc_kernel_send_data(struct socket *sock, struct rxrpc_call *call,
 	case RXRPC_CALL_CLIENT_SEND_REQUEST:
 	case RXRPC_CALL_SERVER_ACK_REQUEST:
 	case RXRPC_CALL_SERVER_SEND_REPLY:
-		ret = rxrpc_send_data(rxrpc_sk(sock->sk), call, msg, len);
+		ret = rxrpc_send_data(rxrpc_sk(sock->sk), call, msg, len,
+				      notify_end_tx);
 		break;
 	case RXRPC_CALL_COMPLETE:
 		read_lock_bh(&call->state_lock);
-- 
cgit v1.2.3


From c038a58ccfd6704d4d7d60ed3d6a0fca13cf13a4 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 29 Aug 2017 10:19:01 +0100
Subject: rxrpc: Allow failed client calls to be retried

Allow a client call that failed on network error to be retried, provided
that the Tx queue still holds DATA packet 1.  This allows an operation to
be submitted to another server or another address for the same server
without having to repackage and re-encrypt the data so far processed.

Two new functions are provided:

 (1) rxrpc_kernel_check_call() - This is used to find out the completion
     state of a call to guess whether it can be retried and whether it
     should be retried.

 (2) rxrpc_kernel_retry_call() - Disconnect the call from its current
     connection, reset the state and submit it as a new client call to a
     new address.  The new address need not match the previous address.

A call may be retried even if all the data hasn't been loaded into it yet;
a partially constructed will be retained at the same point it was at when
an error condition was detected.  msg_data_left() can be used to find out
how much data was packaged before the error occurred.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 Documentation/networking/rxrpc.txt |  45 ++++++++++++++++
 include/net/af_rxrpc.h             |  16 ++++++
 net/rxrpc/af_rxrpc.c               |  69 +++++++++++++++++++++++++
 net/rxrpc/ar-internal.h            |  19 +++----
 net/rxrpc/call_object.c            | 102 +++++++++++++++++++++++++++++++++++--
 net/rxrpc/conn_client.c            |  17 +++++--
 net/rxrpc/sendmsg.c                |  24 +++++----
 7 files changed, 261 insertions(+), 31 deletions(-)

(limited to 'include/net')

diff --git a/Documentation/networking/rxrpc.txt b/Documentation/networking/rxrpc.txt
index 92a3c3bd5ac3..810620153a44 100644
--- a/Documentation/networking/rxrpc.txt
+++ b/Documentation/networking/rxrpc.txt
@@ -975,6 +975,51 @@ The kernel interface functions are as follows:
      size should be set when the call is begun.  tx_total_len may not be less
      than zero.
 
+ (*) Check to see the completion state of a call so that the caller can assess
+     whether it needs to be retried.
+
+	enum rxrpc_call_completion {
+		RXRPC_CALL_SUCCEEDED,
+		RXRPC_CALL_REMOTELY_ABORTED,
+		RXRPC_CALL_LOCALLY_ABORTED,
+		RXRPC_CALL_LOCAL_ERROR,
+		RXRPC_CALL_NETWORK_ERROR,
+	};
+
+	int rxrpc_kernel_check_call(struct socket *sock, struct rxrpc_call *call,
+				    enum rxrpc_call_completion *_compl,
+				    u32 *_abort_code);
+
+     On return, -EINPROGRESS will be returned if the call is still ongoing; if
+     it is finished, *_compl will be set to indicate the manner of completion,
+     *_abort_code will be set to any abort code that occurred.  0 will be
+     returned on a successful completion, -ECONNABORTED will be returned if the
+     client failed due to a remote abort and anything else will return an
+     appropriate error code.
+
+     The caller should look at this information to decide if it's worth
+     retrying the call.
+
+ (*) Retry a client call.
+
+	int rxrpc_kernel_retry_call(struct socket *sock,
+				    struct rxrpc_call *call,
+				    struct sockaddr_rxrpc *srx,
+				    struct key *key);
+
+     This attempts to partially reinitialise a call and submit it again whilst
+     reusing the original call's Tx queue to avoid the need to repackage and
+     re-encrypt the data to be sent.  call indicates the call to retry, srx the
+     new address to send it to and key the encryption key to use for signing or
+     encrypting the packets.
+
+     For this to work, the first Tx data packet must still be in the transmit
+     queue, and currently this is only permitted for local and network errors
+     and the call must not have been aborted.  Any partially constructed Tx
+     packet is left as is and can continue being filled afterwards.
+
+     It returns 0 if the call was requeued and an error otherwise.
+
 
 =======================
 CONFIGURABLE PARAMETERS
diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h
index 07a47ee6f783..3ac79150291f 100644
--- a/include/net/af_rxrpc.h
+++ b/include/net/af_rxrpc.h
@@ -19,6 +19,18 @@ struct sock;
 struct socket;
 struct rxrpc_call;
 
+/*
+ * Call completion condition (state == RXRPC_CALL_COMPLETE).
+ */
+enum rxrpc_call_completion {
+	RXRPC_CALL_SUCCEEDED,		/* - Normal termination */
+	RXRPC_CALL_REMOTELY_ABORTED,	/* - call aborted by peer */
+	RXRPC_CALL_LOCALLY_ABORTED,	/* - call aborted locally on error or close */
+	RXRPC_CALL_LOCAL_ERROR,		/* - call failed due to local error */
+	RXRPC_CALL_NETWORK_ERROR,	/* - call terminated by network error */
+	NR__RXRPC_CALL_COMPLETIONS
+};
+
 typedef void (*rxrpc_notify_rx_t)(struct sock *, struct rxrpc_call *,
 				  unsigned long);
 typedef void (*rxrpc_notify_end_tx_t)(struct sock *, struct rxrpc_call *,
@@ -51,5 +63,9 @@ void rxrpc_kernel_get_peer(struct socket *, struct rxrpc_call *,
 int rxrpc_kernel_charge_accept(struct socket *, rxrpc_notify_rx_t,
 			       rxrpc_user_attach_call_t, unsigned long, gfp_t);
 void rxrpc_kernel_set_tx_length(struct socket *, struct rxrpc_call *, s64);
+int rxrpc_kernel_retry_call(struct socket *, struct rxrpc_call *,
+			    struct sockaddr_rxrpc *, struct key *);
+int rxrpc_kernel_check_call(struct socket *, struct rxrpc_call *,
+			    enum rxrpc_call_completion *, u32 *);
 
 #endif /* _NET_RXRPC_H */
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index 31e97f714ca9..fb17552fd292 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -336,6 +336,75 @@ void rxrpc_kernel_end_call(struct socket *sock, struct rxrpc_call *call)
 }
 EXPORT_SYMBOL(rxrpc_kernel_end_call);
 
+/**
+ * rxrpc_kernel_check_call - Check a call's state
+ * @sock: The socket the call is on
+ * @call: The call to check
+ * @_compl: Where to store the completion state
+ * @_abort_code: Where to store any abort code
+ *
+ * Allow a kernel service to query the state of a call and find out the manner
+ * of its termination if it has completed.  Returns -EINPROGRESS if the call is
+ * still going, 0 if the call finished successfully, -ECONNABORTED if the call
+ * was aborted and an appropriate error if the call failed in some other way.
+ */
+int rxrpc_kernel_check_call(struct socket *sock, struct rxrpc_call *call,
+			    enum rxrpc_call_completion *_compl, u32 *_abort_code)
+{
+	if (call->state != RXRPC_CALL_COMPLETE)
+		return -EINPROGRESS;
+	smp_rmb();
+	*_compl = call->completion;
+	*_abort_code = call->abort_code;
+	return call->error;
+}
+EXPORT_SYMBOL(rxrpc_kernel_check_call);
+
+/**
+ * rxrpc_kernel_retry_call - Allow a kernel service to retry a call
+ * @sock: The socket the call is on
+ * @call: The call to retry
+ * @srx: The address of the peer to contact
+ * @key: The security context to use (defaults to socket setting)
+ *
+ * Allow a kernel service to try resending a client call that failed due to a
+ * network error to a new address.  The Tx queue is maintained intact, thereby
+ * relieving the need to re-encrypt any request data that has already been
+ * buffered.
+ */
+int rxrpc_kernel_retry_call(struct socket *sock, struct rxrpc_call *call,
+			    struct sockaddr_rxrpc *srx, struct key *key)
+{
+	struct rxrpc_conn_parameters cp;
+	struct rxrpc_sock *rx = rxrpc_sk(sock->sk);
+	int ret;
+
+	_enter("%d{%d}", call->debug_id, atomic_read(&call->usage));
+
+	if (!key)
+		key = rx->key;
+	if (key && !key->payload.data[0])
+		key = NULL; /* a no-security key */
+
+	memset(&cp, 0, sizeof(cp));
+	cp.local		= rx->local;
+	cp.key			= key;
+	cp.security_level	= 0;
+	cp.exclusive		= false;
+	cp.service_id		= srx->srx_service;
+
+	mutex_lock(&call->user_mutex);
+
+	ret = rxrpc_prepare_call_for_retry(rx, call);
+	if (ret == 0)
+		ret = rxrpc_retry_client_call(rx, call, &cp, srx, GFP_KERNEL);
+
+	mutex_unlock(&call->user_mutex);
+	_leave(" = %d", ret);
+	return ret;
+}
+EXPORT_SYMBOL(rxrpc_kernel_retry_call);
+
 /**
  * rxrpc_kernel_new_call_notification - Get notifications of new calls
  * @sock: The socket to intercept received messages on
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 227e24794055..ea5600b747cc 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -445,6 +445,7 @@ enum rxrpc_call_flag {
 	RXRPC_CALL_EXPOSED,		/* The call was exposed to the world */
 	RXRPC_CALL_RX_LAST,		/* Received the last packet (at rxtx_top) */
 	RXRPC_CALL_TX_LAST,		/* Last packet in Tx buffer (at rxtx_top) */
+	RXRPC_CALL_TX_LASTQ,		/* Last packet has been queued */
 	RXRPC_CALL_SEND_PING,		/* A ping will need to be sent */
 	RXRPC_CALL_PINGING,		/* Ping in process */
 	RXRPC_CALL_RETRANS_TIMEOUT,	/* Retransmission due to timeout occurred */
@@ -481,18 +482,6 @@ enum rxrpc_call_state {
 	NR__RXRPC_CALL_STATES
 };
 
-/*
- * Call completion condition (state == RXRPC_CALL_COMPLETE).
- */
-enum rxrpc_call_completion {
-	RXRPC_CALL_SUCCEEDED,		/* - Normal termination */
-	RXRPC_CALL_REMOTELY_ABORTED,	/* - call aborted by peer */
-	RXRPC_CALL_LOCALLY_ABORTED,	/* - call aborted locally on error or close */
-	RXRPC_CALL_LOCAL_ERROR,		/* - call failed due to local error */
-	RXRPC_CALL_NETWORK_ERROR,	/* - call terminated by network error */
-	NR__RXRPC_CALL_COMPLETIONS
-};
-
 /*
  * Call Tx congestion management modes.
  */
@@ -687,9 +676,15 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *,
 					 struct rxrpc_conn_parameters *,
 					 struct sockaddr_rxrpc *,
 					 unsigned long, s64, gfp_t);
+int rxrpc_retry_client_call(struct rxrpc_sock *,
+			    struct rxrpc_call *,
+			    struct rxrpc_conn_parameters *,
+			    struct sockaddr_rxrpc *,
+			    gfp_t);
 void rxrpc_incoming_call(struct rxrpc_sock *, struct rxrpc_call *,
 			 struct sk_buff *);
 void rxrpc_release_call(struct rxrpc_sock *, struct rxrpc_call *);
+int rxrpc_prepare_call_for_retry(struct rxrpc_sock *, struct rxrpc_call *);
 void rxrpc_release_calls_on_socket(struct rxrpc_sock *);
 bool __rxrpc_queue_call(struct rxrpc_call *);
 bool rxrpc_queue_call(struct rxrpc_call *);
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index d7809a0620b4..fcdd6555a820 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -269,11 +269,6 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
 	trace_rxrpc_call(call, rxrpc_call_connected, atomic_read(&call->usage),
 			 here, NULL);
 
-	spin_lock_bh(&call->conn->params.peer->lock);
-	hlist_add_head(&call->error_link,
-		       &call->conn->params.peer->error_targets);
-	spin_unlock_bh(&call->conn->params.peer->lock);
-
 	rxrpc_start_call_timer(call);
 
 	_net("CALL new %d on CONN %d", call->debug_id, call->conn->debug_id);
@@ -303,6 +298,48 @@ error:
 	return ERR_PTR(ret);
 }
 
+/*
+ * Retry a call to a new address.  It is expected that the Tx queue of the call
+ * will contain data previously packaged for an old call.
+ */
+int rxrpc_retry_client_call(struct rxrpc_sock *rx,
+			    struct rxrpc_call *call,
+			    struct rxrpc_conn_parameters *cp,
+			    struct sockaddr_rxrpc *srx,
+			    gfp_t gfp)
+{
+	const void *here = __builtin_return_address(0);
+	int ret;
+
+	/* Set up or get a connection record and set the protocol parameters,
+	 * including channel number and call ID.
+	 */
+	ret = rxrpc_connect_call(call, cp, srx, gfp);
+	if (ret < 0)
+		goto error;
+
+	trace_rxrpc_call(call, rxrpc_call_connected, atomic_read(&call->usage),
+			 here, NULL);
+
+	rxrpc_start_call_timer(call);
+
+	_net("CALL new %d on CONN %d", call->debug_id, call->conn->debug_id);
+
+	if (!test_and_set_bit(RXRPC_CALL_EV_RESEND, &call->events))
+		rxrpc_queue_call(call);
+
+	_leave(" = 0");
+	return 0;
+
+error:
+	rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR,
+				  RX_CALL_DEAD, ret);
+	trace_rxrpc_call(call, rxrpc_call_error, atomic_read(&call->usage),
+			 here, ERR_PTR(ret));
+	_leave(" = %d", ret);
+	return ret;
+}
+
 /*
  * Set up an incoming call.  call->conn points to the connection.
  * This is called in BH context and isn't allowed to fail.
@@ -470,6 +507,61 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call)
 	_leave("");
 }
 
+/*
+ * Prepare a kernel service call for retry.
+ */
+int rxrpc_prepare_call_for_retry(struct rxrpc_sock *rx, struct rxrpc_call *call)
+{
+	const void *here = __builtin_return_address(0);
+	int i;
+	u8 last = 0;
+
+	_enter("{%d,%d}", call->debug_id, atomic_read(&call->usage));
+
+	trace_rxrpc_call(call, rxrpc_call_release, atomic_read(&call->usage),
+			 here, (const void *)call->flags);
+
+	ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE);
+	ASSERTCMP(call->completion, !=, RXRPC_CALL_REMOTELY_ABORTED);
+	ASSERTCMP(call->completion, !=, RXRPC_CALL_LOCALLY_ABORTED);
+	ASSERT(list_empty(&call->recvmsg_link));
+
+	del_timer_sync(&call->timer);
+
+	_debug("RELEASE CALL %p (%d CONN %p)", call, call->debug_id, call->conn);
+
+	if (call->conn)
+		rxrpc_disconnect_call(call);
+
+	if (rxrpc_is_service_call(call) ||
+	    !call->tx_phase ||
+	    call->tx_hard_ack != 0 ||
+	    call->rx_hard_ack != 0 ||
+	    call->rx_top != 0)
+		return -EINVAL;
+
+	call->state = RXRPC_CALL_UNINITIALISED;
+	call->completion = RXRPC_CALL_SUCCEEDED;
+	call->call_id = 0;
+	call->cid = 0;
+	call->cong_cwnd = 0;
+	call->cong_extra = 0;
+	call->cong_ssthresh = 0;
+	call->cong_mode = 0;
+	call->cong_dup_acks = 0;
+	call->cong_cumul_acks = 0;
+	call->acks_lowest_nak = 0;
+
+	for (i = 0; i < RXRPC_RXTX_BUFF_SIZE; i++) {
+		last |= call->rxtx_annotations[i];
+		call->rxtx_annotations[i] &= RXRPC_TX_ANNO_LAST;
+		call->rxtx_annotations[i] |= RXRPC_TX_ANNO_RETRANS;
+	}
+
+	_leave(" = 0");
+	return 0;
+}
+
 /*
  * release all the calls associated with a socket
  */
diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c
index eb2157680399..5f9624bd311c 100644
--- a/net/rxrpc/conn_client.c
+++ b/net/rxrpc/conn_client.c
@@ -555,7 +555,10 @@ static void rxrpc_activate_one_channel(struct rxrpc_connection *conn,
 	trace_rxrpc_client(conn, channel, rxrpc_client_chan_activate);
 
 	write_lock_bh(&call->state_lock);
-	call->state = RXRPC_CALL_CLIENT_SEND_REQUEST;
+	if (!test_bit(RXRPC_CALL_TX_LASTQ, &call->flags))
+		call->state = RXRPC_CALL_CLIENT_SEND_REQUEST;
+	else
+		call->state = RXRPC_CALL_CLIENT_AWAIT_REPLY;
 	write_unlock_bh(&call->state_lock);
 
 	rxrpc_see_call(call);
@@ -688,15 +691,23 @@ int rxrpc_connect_call(struct rxrpc_call *call,
 
 	ret = rxrpc_get_client_conn(call, cp, srx, gfp);
 	if (ret < 0)
-		return ret;
+		goto out;
 
 	rxrpc_animate_client_conn(rxnet, call->conn);
 	rxrpc_activate_channels(call->conn);
 
 	ret = rxrpc_wait_for_channel(call, gfp);
-	if (ret < 0)
+	if (ret < 0) {
 		rxrpc_disconnect_client_call(call);
+		goto out;
+	}
+
+	spin_lock_bh(&call->conn->params.peer->lock);
+	hlist_add_head(&call->error_link,
+		       &call->conn->params.peer->error_targets);
+	spin_unlock_bh(&call->conn->params.peer->lock);
 
+out:
 	_leave(" = %d", ret);
 	return ret;
 }
diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
index 344fdce89823..9ea6f972767e 100644
--- a/net/rxrpc/sendmsg.c
+++ b/net/rxrpc/sendmsg.c
@@ -128,8 +128,10 @@ static void rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call,
 
 	ASSERTCMP(seq, ==, call->tx_top + 1);
 
-	if (last)
+	if (last) {
 		annotation |= RXRPC_TX_ANNO_LAST;
+		set_bit(RXRPC_CALL_TX_LASTQ, &call->flags);
+	}
 
 	/* We have to set the timestamp before queueing as the retransmit
 	 * algorithm can see the packet as soon as we queue it.
@@ -326,11 +328,6 @@ static int rxrpc_send_data(struct rxrpc_sock *rx,
 				call->tx_total_len -= copy;
 		}
 
-		/* check for the far side aborting the call or a network error
-		 * occurring */
-		if (call->state == RXRPC_CALL_COMPLETE)
-			goto call_terminated;
-
 		/* add the packet to the send queue if it's now full */
 		if (sp->remain <= 0 ||
 		    (msg_data_left(msg) == 0 && !more)) {
@@ -370,6 +367,16 @@ static int rxrpc_send_data(struct rxrpc_sock *rx,
 					   notify_end_tx);
 			skb = NULL;
 		}
+
+		/* Check for the far side aborting the call or a network error
+		 * occurring.  If this happens, save any packet that was under
+		 * construction so that in the case of a network error, the
+		 * call can be retried or redirected.
+		 */
+		if (call->state == RXRPC_CALL_COMPLETE) {
+			ret = call->error;
+			goto out;
+		}
 	} while (msg_data_left(msg) > 0);
 
 success:
@@ -379,11 +386,6 @@ out:
 	_leave(" = %d", ret);
 	return ret;
 
-call_terminated:
-	rxrpc_free_skb(skb, rxrpc_skb_tx_freed);
-	_leave(" = %d", call->error);
-	return call->error;
-
 maybe_error:
 	if (copied)
 		goto success;
-- 
cgit v1.2.3


From fa20e0e32cb3dfc1760b6254b64977f2fb5bd851 Mon Sep 17 00:00:00 2001
From: Jiri Benc <jbenc@redhat.com>
Date: Mon, 28 Aug 2017 21:43:22 +0200
Subject: vxlan: factor out VXLAN-GPE next protocol

The values are shared between VXLAN-GPE and NSH. Originally probably by
coincidence but I notified both working groups about this last year and they
seem to keep the values in sync since then.

Hopefully they'll get a single IANA registry for the values, too. (I asked
them for that.)

Factor out the code to be shared by the NSH implementation.

NSH and MPLS values are added in this patch, too. For MPLS, the drafts
incorrectly assign only a single value, while we have two MPLS ethertypes.
I raised the problem with both groups. For now, I assume the value is for
unicast.

Signed-off-by: Jiri Benc <jbenc@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vxlan.c     | 32 +++++++-------------------------
 include/net/tun_proto.h | 49 +++++++++++++++++++++++++++++++++++++++++++++++++
 include/net/vxlan.h     |  6 ------
 3 files changed, 56 insertions(+), 31 deletions(-)
 create mode 100644 include/net/tun_proto.h

(limited to 'include/net')

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index ae3a1da703c2..d7c49cf1d5e9 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -26,6 +26,7 @@
 #include <net/inet_ecn.h>
 #include <net/net_namespace.h>
 #include <net/netns/generic.h>
+#include <net/tun_proto.h>
 #include <net/vxlan.h>
 
 #if IS_ENABLED(CONFIG_IPV6)
@@ -1261,19 +1262,9 @@ static bool vxlan_parse_gpe_hdr(struct vxlanhdr *unparsed,
 	if (gpe->oam_flag)
 		return false;
 
-	switch (gpe->next_protocol) {
-	case VXLAN_GPE_NP_IPV4:
-		*protocol = htons(ETH_P_IP);
-		break;
-	case VXLAN_GPE_NP_IPV6:
-		*protocol = htons(ETH_P_IPV6);
-		break;
-	case VXLAN_GPE_NP_ETHERNET:
-		*protocol = htons(ETH_P_TEB);
-		break;
-	default:
+	*protocol = tun_p_to_eth_p(gpe->next_protocol);
+	if (!*protocol)
 		return false;
-	}
 
 	unparsed->vx_flags &= ~VXLAN_GPE_USED_BITS;
 	return true;
@@ -1799,19 +1790,10 @@ static int vxlan_build_gpe_hdr(struct vxlanhdr *vxh, u32 vxflags,
 	struct vxlanhdr_gpe *gpe = (struct vxlanhdr_gpe *)vxh;
 
 	gpe->np_applied = 1;
-
-	switch (protocol) {
-	case htons(ETH_P_IP):
-		gpe->next_protocol = VXLAN_GPE_NP_IPV4;
-		return 0;
-	case htons(ETH_P_IPV6):
-		gpe->next_protocol = VXLAN_GPE_NP_IPV6;
-		return 0;
-	case htons(ETH_P_TEB):
-		gpe->next_protocol = VXLAN_GPE_NP_ETHERNET;
-		return 0;
-	}
-	return -EPFNOSUPPORT;
+	gpe->next_protocol = tun_p_from_eth_p(protocol);
+	if (!gpe->next_protocol)
+		return -EPFNOSUPPORT;
+	return 0;
 }
 
 static int vxlan_build_skb(struct sk_buff *skb, struct dst_entry *dst,
diff --git a/include/net/tun_proto.h b/include/net/tun_proto.h
new file mode 100644
index 000000000000..2ea3deba4c99
--- /dev/null
+++ b/include/net/tun_proto.h
@@ -0,0 +1,49 @@
+#ifndef __NET_TUN_PROTO_H
+#define __NET_TUN_PROTO_H
+
+#include <linux/kernel.h>
+
+/* One byte protocol values as defined by VXLAN-GPE and NSH. These will
+ * hopefully get a shared IANA registry.
+ */
+#define TUN_P_IPV4      0x01
+#define TUN_P_IPV6      0x02
+#define TUN_P_ETHERNET  0x03
+#define TUN_P_NSH       0x04
+#define TUN_P_MPLS_UC   0x05
+
+static inline __be16 tun_p_to_eth_p(u8 proto)
+{
+	switch (proto) {
+	case TUN_P_IPV4:
+		return htons(ETH_P_IP);
+	case TUN_P_IPV6:
+		return htons(ETH_P_IPV6);
+	case TUN_P_ETHERNET:
+		return htons(ETH_P_TEB);
+	case TUN_P_NSH:
+		return htons(ETH_P_NSH);
+	case TUN_P_MPLS_UC:
+		return htons(ETH_P_MPLS_UC);
+	}
+	return 0;
+}
+
+static inline u8 tun_p_from_eth_p(__be16 proto)
+{
+	switch (proto) {
+	case htons(ETH_P_IP):
+		return TUN_P_IPV4;
+	case htons(ETH_P_IPV6):
+		return TUN_P_IPV6;
+	case htons(ETH_P_TEB):
+		return TUN_P_ETHERNET;
+	case htons(ETH_P_NSH):
+		return TUN_P_NSH;
+	case htons(ETH_P_MPLS_UC):
+		return TUN_P_MPLS_UC;
+	}
+	return 0;
+}
+
+#endif
diff --git a/include/net/vxlan.h b/include/net/vxlan.h
index 3f430e38ab82..4e3876dde295 100644
--- a/include/net/vxlan.h
+++ b/include/net/vxlan.h
@@ -168,12 +168,6 @@ reserved_flags2:2;
 #define VXLAN_GPE_USED_BITS (VXLAN_HF_VER | VXLAN_HF_NP | VXLAN_HF_OAM | \
 			     cpu_to_be32(0xff))
 
-/* VXLAN-GPE header Next Protocol. */
-#define VXLAN_GPE_NP_IPV4      0x01
-#define VXLAN_GPE_NP_IPV6      0x02
-#define VXLAN_GPE_NP_ETHERNET  0x03
-#define VXLAN_GPE_NP_NSH       0x04
-
 struct vxlan_metadata {
 	u32		gbp;
 };
-- 
cgit v1.2.3


From 1f0b7744c50573df464ca33d8e5275be509f852b Mon Sep 17 00:00:00 2001
From: Yi Yang <yi.y.yang@intel.com>
Date: Mon, 28 Aug 2017 21:43:23 +0200
Subject: net: add NSH header structures and helpers

NSH (Network Service Header)[1] is a new protocol for service
function chaining, it can be handled as a L3 protocol like
IPv4 and IPv6, Eth + NSH + Inner packet or VxLAN-gpe + NSH +
Inner packet are two typical use cases.

This patch adds NSH header structures and helpers for NSH GSO
support and Open vSwitch NSH support.

[1] https://datatracker.ietf.org/doc/draft-ietf-sfc-nsh/

[Jiri: added nsh_hdr() helper and renamed the header struct to "struct
nshhdr" to match the usual pattern. Removed packet type defines, these are
now shared with VXLAN-GPE.]

Signed-off-by: Yi Yang <yi.y.yang@intel.com>
Signed-off-by: Jiri Benc <jbenc@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/nsh.h | 307 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 307 insertions(+)
 create mode 100644 include/net/nsh.h

(limited to 'include/net')

diff --git a/include/net/nsh.h b/include/net/nsh.h
new file mode 100644
index 000000000000..a1eaea20be96
--- /dev/null
+++ b/include/net/nsh.h
@@ -0,0 +1,307 @@
+#ifndef __NET_NSH_H
+#define __NET_NSH_H 1
+
+#include <linux/skbuff.h>
+
+/*
+ * Network Service Header:
+ *  0                   1                   2                   3
+ *  0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |Ver|O|U|    TTL    |   Length  |U|U|U|U|MD Type| Next Protocol |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |          Service Path Identifier (SPI)        | Service Index |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                                                               |
+ * ~               Mandatory/Optional Context Headers              ~
+ * |                                                               |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * Version: The version field is used to ensure backward compatibility
+ * going forward with future NSH specification updates.  It MUST be set
+ * to 0x0 by the sender, in this first revision of NSH.  Given the
+ * widespread implementation of existing hardware that uses the first
+ * nibble after an MPLS label stack for ECMP decision processing, this
+ * document reserves version 01b and this value MUST NOT be used in
+ * future versions of the protocol.  Please see [RFC7325] for further
+ * discussion of MPLS-related forwarding requirements.
+ *
+ * O bit: Setting this bit indicates an Operations, Administration, and
+ * Maintenance (OAM) packet.  The actual format and processing of SFC
+ * OAM packets is outside the scope of this specification (see for
+ * example [I-D.ietf-sfc-oam-framework] for one approach).
+ *
+ * The O bit MUST be set for OAM packets and MUST NOT be set for non-OAM
+ * packets.  The O bit MUST NOT be modified along the SFP.
+ *
+ * SF/SFF/SFC Proxy/Classifier implementations that do not support SFC
+ * OAM procedures SHOULD discard packets with O bit set, but MAY support
+ * a configurable parameter to enable forwarding received SFC OAM
+ * packets unmodified to the next element in the chain.  Forwarding OAM
+ * packets unmodified by SFC elements that do not support SFC OAM
+ * procedures may be acceptable for a subset of OAM functions, but can
+ * result in unexpected outcomes for others, thus it is recommended to
+ * analyze the impact of forwarding an OAM packet for all OAM functions
+ * prior to enabling this behavior.  The configurable parameter MUST be
+ * disabled by default.
+ *
+ * TTL: Indicates the maximum SFF hops for an SFP.  This field is used
+ * for service plane loop detection.  The initial TTL value SHOULD be
+ * configurable via the control plane; the configured initial value can
+ * be specific to one or more SFPs.  If no initial value is explicitly
+ * provided, the default initial TTL value of 63 MUST be used.  Each SFF
+ * involved in forwarding an NSH packet MUST decrement the TTL value by
+ * 1 prior to NSH forwarding lookup.  Decrementing by 1 from an incoming
+ * value of 0 shall result in a TTL value of 63.  The packet MUST NOT be
+ * forwarded if TTL is, after decrement, 0.
+ *
+ * All other flag fields, marked U, are unassigned and available for
+ * future use, see Section 11.2.1.  Unassigned bits MUST be set to zero
+ * upon origination, and MUST be ignored and preserved unmodified by
+ * other NSH supporting elements.  Elements which do not understand the
+ * meaning of any of these bits MUST NOT modify their actions based on
+ * those unknown bits.
+ *
+ * Length: The total length, in 4-byte words, of NSH including the Base
+ * Header, the Service Path Header, the Fixed Length Context Header or
+ * Variable Length Context Header(s).  The length MUST be 0x6 for MD
+ * Type equal to 0x1, and MUST be 0x2 or greater for MD Type equal to
+ * 0x2.  The length of the NSH header MUST be an integer multiple of 4
+ * bytes, thus variable length metadata is always padded out to a
+ * multiple of 4 bytes.
+ *
+ * MD Type: Indicates the format of NSH beyond the mandatory Base Header
+ * and the Service Path Header.  MD Type defines the format of the
+ * metadata being carried.
+ *
+ * 0x0 - This is a reserved value.  Implementations SHOULD silently
+ * discard packets with MD Type 0x0.
+ *
+ * 0x1 - This indicates that the format of the header includes a fixed
+ * length Context Header (see Figure 4 below).
+ *
+ * 0x2 - This does not mandate any headers beyond the Base Header and
+ * Service Path Header, but may contain optional variable length Context
+ * Header(s).  The semantics of the variable length Context Header(s)
+ * are not defined in this document.  The format of the optional
+ * variable length Context Headers is provided in Section 2.5.1.
+ *
+ * 0xF - This value is reserved for experimentation and testing, as per
+ * [RFC3692].  Implementations not explicitly configured to be part of
+ * an experiment SHOULD silently discard packets with MD Type 0xF.
+ *
+ * Next Protocol: indicates the protocol type of the encapsulated data.
+ * NSH does not alter the inner payload, and the semantics on the inner
+ * protocol remain unchanged due to NSH service function chaining.
+ * Please see the IANA Considerations section below, Section 11.2.5.
+ *
+ * This document defines the following Next Protocol values:
+ *
+ * 0x1: IPv4
+ * 0x2: IPv6
+ * 0x3: Ethernet
+ * 0x4: NSH
+ * 0x5: MPLS
+ * 0xFE: Experiment 1
+ * 0xFF: Experiment 2
+ *
+ * Packets with Next Protocol values not supported SHOULD be silently
+ * dropped by default, although an implementation MAY provide a
+ * configuration parameter to forward them.  Additionally, an
+ * implementation not explicitly configured for a specific experiment
+ * [RFC3692] SHOULD silently drop packets with Next Protocol values 0xFE
+ * and 0xFF.
+ *
+ * Service Path Identifier (SPI): Identifies a service path.
+ * Participating nodes MUST use this identifier for Service Function
+ * Path selection.  The initial classifier MUST set the appropriate SPI
+ * for a given classification result.
+ *
+ * Service Index (SI): Provides location within the SFP.  The initial
+ * classifier for a given SFP SHOULD set the SI to 255, however the
+ * control plane MAY configure the initial value of SI as appropriate
+ * (i.e., taking into account the length of the service function path).
+ * The Service Index MUST be decremented by a value of 1 by Service
+ * Functions or by SFC Proxy nodes after performing required services
+ * and the new decremented SI value MUST be used in the egress packet's
+ * NSH.  The initial Classifier MUST send the packet to the first SFF in
+ * the identified SFP for forwarding along an SFP.  If re-classification
+ * occurs, and that re-classification results in a new SPI, the
+ * (re)classifier is, in effect, the initial classifier for the
+ * resultant SPI.
+ *
+ * The SI is used in conjunction the with Service Path Identifier for
+ * Service Function Path Selection and for determining the next SFF/SF
+ * in the path.  The SI is also valuable when troubleshooting or
+ * reporting service paths.  Additionally, while the TTL field is the
+ * main mechanism for service plane loop detection, the SI can also be
+ * used for detecting service plane loops.
+ *
+ * When the Base Header specifies MD Type = 0x1, a Fixed Length Context
+ * Header (16-bytes) MUST be present immediately following the Service
+ * Path Header. The value of a Fixed Length Context
+ * Header that carries no metadata MUST be set to zero.
+ *
+ * When the base header specifies MD Type = 0x2, zero or more Variable
+ * Length Context Headers MAY be added, immediately following the
+ * Service Path Header (see Figure 5).  Therefore, Length = 0x2,
+ * indicates that only the Base Header followed by the Service Path
+ * Header are present.  The optional Variable Length Context Headers
+ * MUST be of an integer number of 4-bytes.  The base header Length
+ * field MUST be used to determine the offset to locate the original
+ * packet or frame for SFC nodes that require access to that
+ * information.
+ *
+ * The format of the optional variable length Context Headers
+ *
+ *  0                   1                   2                   3
+ *  0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |          Metadata Class       |      Type     |U|    Length   |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                      Variable Metadata                        |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * Metadata Class (MD Class): Defines the scope of the 'Type' field to
+ * provide a hierarchical namespace.  The IANA Considerations
+ * Section 11.2.4 defines how the MD Class values can be allocated to
+ * standards bodies, vendors, and others.
+ *
+ * Type: Indicates the explicit type of metadata being carried.  The
+ * definition of the Type is the responsibility of the MD Class owner.
+ *
+ * Unassigned bit: One unassigned bit is available for future use. This
+ * bit MUST NOT be set, and MUST be ignored on receipt.
+ *
+ * Length: Indicates the length of the variable metadata, in bytes.  In
+ * case the metadata length is not an integer number of 4-byte words,
+ * the sender MUST add pad bytes immediately following the last metadata
+ * byte to extend the metadata to an integer number of 4-byte words.
+ * The receiver MUST round up the length field to the nearest 4-byte
+ * word boundary, to locate and process the next field in the packet.
+ * The receiver MUST access only those bytes in the metadata indicated
+ * by the length field (i.e., actual number of bytes) and MUST ignore
+ * the remaining bytes up to the nearest 4-byte word boundary.  The
+ * Length may be 0 or greater.
+ *
+ * A value of 0 denotes a Context Header without a Variable Metadata
+ * field.
+ *
+ * [0] https://datatracker.ietf.org/doc/draft-ietf-sfc-nsh/
+ */
+
+/**
+ * struct nsh_md1_ctx - Keeps track of NSH context data
+ * @nshc<1-4>: NSH Contexts.
+ */
+struct nsh_md1_ctx {
+	__be32 context[4];
+};
+
+struct nsh_md2_tlv {
+	__be16 md_class;
+	u8 type;
+	u8 length;
+	u8 md_value[];
+};
+
+struct nshhdr {
+	__be16 ver_flags_ttl_len;
+	u8 mdtype;
+	u8 np;
+	__be32 path_hdr;
+	union {
+	    struct nsh_md1_ctx md1;
+	    struct nsh_md2_tlv md2;
+	};
+};
+
+/* Masking NSH header fields. */
+#define NSH_VER_MASK       0xc000
+#define NSH_VER_SHIFT      14
+#define NSH_FLAGS_MASK     0x3000
+#define NSH_FLAGS_SHIFT    12
+#define NSH_TTL_MASK       0x0fc0
+#define NSH_TTL_SHIFT      6
+#define NSH_LEN_MASK       0x003f
+#define NSH_LEN_SHIFT      0
+
+#define NSH_MDTYPE_MASK    0x0f
+#define NSH_MDTYPE_SHIFT   0
+
+#define NSH_SPI_MASK       0xffffff00
+#define NSH_SPI_SHIFT      8
+#define NSH_SI_MASK        0x000000ff
+#define NSH_SI_SHIFT       0
+
+/* MD Type Registry. */
+#define NSH_M_TYPE1     0x01
+#define NSH_M_TYPE2     0x02
+#define NSH_M_EXP1      0xFE
+#define NSH_M_EXP2      0xFF
+
+/* NSH Base Header Length */
+#define NSH_BASE_HDR_LEN  8
+
+/* NSH MD Type 1 header Length. */
+#define NSH_M_TYPE1_LEN   24
+
+/* NSH header maximum Length. */
+#define NSH_HDR_MAX_LEN 256
+
+/* NSH context headers maximum Length. */
+#define NSH_CTX_HDRS_MAX_LEN 248
+
+static inline struct nshhdr *nsh_hdr(struct sk_buff *skb)
+{
+	return (struct nshhdr *)skb_network_header(skb);
+}
+
+static inline u16 nsh_hdr_len(const struct nshhdr *nsh)
+{
+	return ((ntohs(nsh->ver_flags_ttl_len) & NSH_LEN_MASK)
+		>> NSH_LEN_SHIFT) << 2;
+}
+
+static inline u8 nsh_get_ver(const struct nshhdr *nsh)
+{
+	return (ntohs(nsh->ver_flags_ttl_len) & NSH_VER_MASK)
+		>> NSH_VER_SHIFT;
+}
+
+static inline u8 nsh_get_flags(const struct nshhdr *nsh)
+{
+	return (ntohs(nsh->ver_flags_ttl_len) & NSH_FLAGS_MASK)
+		>> NSH_FLAGS_SHIFT;
+}
+
+static inline u8 nsh_get_ttl(const struct nshhdr *nsh)
+{
+	return (ntohs(nsh->ver_flags_ttl_len) & NSH_TTL_MASK)
+		>> NSH_TTL_SHIFT;
+}
+
+static inline void __nsh_set_xflag(struct nshhdr *nsh, u16 xflag, u16 xmask)
+{
+	nsh->ver_flags_ttl_len
+		= (nsh->ver_flags_ttl_len & ~htons(xmask)) | htons(xflag);
+}
+
+static inline void nsh_set_flags_and_ttl(struct nshhdr *nsh, u8 flags, u8 ttl)
+{
+	__nsh_set_xflag(nsh, ((flags << NSH_FLAGS_SHIFT) & NSH_FLAGS_MASK) |
+			     ((ttl << NSH_TTL_SHIFT) & NSH_TTL_MASK),
+			NSH_FLAGS_MASK | NSH_TTL_MASK);
+}
+
+static inline void nsh_set_flags_ttl_len(struct nshhdr *nsh, u8 flags,
+					 u8 ttl, u8 len)
+{
+	len = len >> 2;
+	__nsh_set_xflag(nsh, ((flags << NSH_FLAGS_SHIFT) & NSH_FLAGS_MASK) |
+			     ((ttl << NSH_TTL_SHIFT) & NSH_TTL_MASK) |
+			     ((len << NSH_LEN_SHIFT) & NSH_LEN_MASK),
+			NSH_FLAGS_MASK | NSH_TTL_MASK | NSH_LEN_MASK);
+}
+
+#endif /* __NET_NSH_H */
-- 
cgit v1.2.3


From 1b70d792cf6775fb5d0737524387893daeb5374a Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Mon, 28 Aug 2017 13:53:34 -0700
Subject: ipv6: Use rt6i_idev index for echo replies to a local address

Tariq repored local pings to linklocal address is failing:
$ ifconfig ens8
ens8: flags=4163<UP,BROADCAST,RUNNING,MULTICAST>  mtu 1500
        inet 11.141.16.6  netmask 255.255.0.0  broadcast 11.141.255.255
        inet6 fe80::7efe:90ff:fecb:7502  prefixlen 64  scopeid 0x20<link>
        ether 7c:fe:90:cb:75:02  txqueuelen 1000  (Ethernet)
        RX packets 12  bytes 1164 (1.1 KiB)
        RX errors 0  dropped 0  overruns 0  frame 0
        TX packets 30  bytes 2484 (2.4 KiB)
        TX errors 0  dropped 0 overruns 0  carrier 0  collisions 0

$  /bin/ping6 -c 3 fe80::7efe:90ff:fecb:7502%ens8
PING fe80::7efe:90ff:fecb:7502%ens8(fe80::7efe:90ff:fecb:7502) 56 data bytes

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip6_route.h | 10 ++++++++++
 net/ipv6/icmp.c         | 33 ++++++++++++++++++++-------------
 2 files changed, 30 insertions(+), 13 deletions(-)

(limited to 'include/net')

diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 882bc3c7ccde..ee96f402cb75 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -164,6 +164,16 @@ void rt6_mtu_change(struct net_device *dev, unsigned int mtu);
 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp);
 void rt6_clean_tohost(struct net *net, struct in6_addr *gateway);
 
+static inline const struct rt6_info *skb_rt6_info(const struct sk_buff *skb)
+{
+	const struct dst_entry *dst = skb_dst(skb);
+	const struct rt6_info *rt6 = NULL;
+
+	if (dst)
+		rt6 = container_of(dst, struct rt6_info, dst);
+
+	return rt6;
+}
 
 /*
  *	Store a destination cache entry in a socket
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index dd7608cf1d72..5acb54405b10 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -399,6 +399,24 @@ relookup_failed:
 	return ERR_PTR(err);
 }
 
+static int icmp6_iif(const struct sk_buff *skb)
+{
+	int iif = skb->dev->ifindex;
+
+	/* for local traffic to local address, skb dev is the loopback
+	 * device. Check if there is a dst attached to the skb and if so
+	 * get the real device index.
+	 */
+	if (unlikely(iif == LOOPBACK_IFINDEX)) {
+		const struct rt6_info *rt6 = skb_rt6_info(skb);
+
+		if (rt6)
+			iif = rt6->rt6i_idev->dev->ifindex;
+	}
+
+	return iif;
+}
+
 /*
  *	Send an ICMP message in response to a packet in error
  */
@@ -460,18 +478,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
 	 */
 
 	if (__ipv6_addr_needs_scope_id(addr_type)) {
-		iif = skb->dev->ifindex;
-
-		/* for local packets, get the real device index */
-		if (iif == LOOPBACK_IFINDEX) {
-			dst = skb_dst(skb);
-			if (dst) {
-				struct rt6_info *rt;
-
-				rt = container_of(dst, struct rt6_info, dst);
-				iif = rt->rt6i_idev->dev->ifindex;
-			}
-		}
+		iif = icmp6_iif(skb);
 	} else {
 		dst = skb_dst(skb);
 		iif = l3mdev_master_ifindex(dst ? dst->dev : skb->dev);
@@ -694,7 +701,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
 	fl6.daddr = ipv6_hdr(skb)->saddr;
 	if (saddr)
 		fl6.saddr = *saddr;
-	fl6.flowi6_oif = skb->dev->ifindex;
+	fl6.flowi6_oif = icmp6_iif(skb);
 	fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY;
 	fl6.flowi6_mark = mark;
 	fl6.flowi6_uid = sock_net_uid(net, NULL);
-- 
cgit v1.2.3


From eaa72dc47488d599439cd0fd0f8c4f1bcb3906bb Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 29 Aug 2017 15:16:01 -0700
Subject: neigh: increase queue_len_bytes to match wmem_default

Florian reported UDP xmit drops that could be root caused to the
too small neigh limit.

Current limit is 64 KB, meaning that even a single UDP socket would hit
it, since its default sk_sndbuf comes from net.core.wmem_default
(~212992 bytes on 64bit arches).

Once ARP/ND resolution is in progress, we should allow a little more
packets to be queued, at least for one producer.

Once neigh arp_queue is filled, a rogue socket should hit its sk_sndbuf
limit and either block in sendmsg() or return -EAGAIN.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ip-sysctl.txt |  7 +++++--
 include/net/sock.h                     | 10 ++++++++++
 net/core/sock.c                        | 10 ----------
 net/decnet/dn_neigh.c                  |  2 +-
 net/ipv4/arp.c                         |  2 +-
 net/ipv4/tcp_input.c                   |  2 +-
 net/ipv6/ndisc.c                       |  2 +-
 7 files changed, 19 insertions(+), 16 deletions(-)

(limited to 'include/net')

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index 6b0bc0f71534..b3345d0fe0a6 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -109,7 +109,10 @@ neigh/default/unres_qlen_bytes - INTEGER
 	queued for each	unresolved address by other network layers.
 	(added in linux 3.3)
 	Setting negative value is meaningless and will return error.
-	Default: 65536 Bytes(64KB)
+	Default: SK_WMEM_MAX, (same as net.core.wmem_default).
+		Exact value depends on architecture and kernel options,
+		but should be enough to allow queuing 256 packets
+		of medium size.
 
 neigh/default/unres_qlen - INTEGER
 	The maximum number of packets which may be queued for each
@@ -119,7 +122,7 @@ neigh/default/unres_qlen - INTEGER
 	unexpected packet loss. The current default value is calculated
 	according to default value of unres_qlen_bytes and true size of
 	packet.
-	Default: 31
+	Default: 101
 
 mtu_expires - INTEGER
 	Time, in seconds, that cached PMTU information is kept.
diff --git a/include/net/sock.h b/include/net/sock.h
index 1c2912d433e8..03a362568357 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2368,6 +2368,16 @@ bool sk_net_capable(const struct sock *sk, int cap);
 
 void sk_get_meminfo(const struct sock *sk, u32 *meminfo);
 
+/* Take into consideration the size of the struct sk_buff overhead in the
+ * determination of these values, since that is non-constant across
+ * platforms.  This makes socket queueing behavior and performance
+ * not depend upon such differences.
+ */
+#define _SK_MEM_PACKETS		256
+#define _SK_MEM_OVERHEAD	SKB_TRUESIZE(256)
+#define SK_WMEM_MAX		(_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
+#define SK_RMEM_MAX		(_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
+
 extern __u32 sysctl_wmem_max;
 extern __u32 sysctl_rmem_max;
 
diff --git a/net/core/sock.c b/net/core/sock.c
index dfdd14cac775..9b7b6bbb2a23 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -307,16 +307,6 @@ static struct lock_class_key af_wlock_keys[AF_MAX];
 static struct lock_class_key af_elock_keys[AF_MAX];
 static struct lock_class_key af_kern_callback_keys[AF_MAX];
 
-/* Take into consideration the size of the struct sk_buff overhead in the
- * determination of these values, since that is non-constant across
- * platforms.  This makes socket queueing behavior and performance
- * not depend upon such differences.
- */
-#define _SK_MEM_PACKETS		256
-#define _SK_MEM_OVERHEAD	SKB_TRUESIZE(256)
-#define SK_WMEM_MAX		(_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
-#define SK_RMEM_MAX		(_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
-
 /* Run time adjustable parameters. */
 __u32 sysctl_wmem_max __read_mostly = SK_WMEM_MAX;
 EXPORT_SYMBOL(sysctl_wmem_max);
diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c
index 21dedf6fd0f7..22bf0b95d6ed 100644
--- a/net/decnet/dn_neigh.c
+++ b/net/decnet/dn_neigh.c
@@ -94,7 +94,7 @@ struct neigh_table dn_neigh_table = {
 			[NEIGH_VAR_BASE_REACHABLE_TIME] = 30 * HZ,
 			[NEIGH_VAR_DELAY_PROBE_TIME] = 5 * HZ,
 			[NEIGH_VAR_GC_STALETIME] = 60 * HZ,
-			[NEIGH_VAR_QUEUE_LEN_BYTES] = 64*1024,
+			[NEIGH_VAR_QUEUE_LEN_BYTES] = SK_WMEM_MAX,
 			[NEIGH_VAR_PROXY_QLEN] = 0,
 			[NEIGH_VAR_ANYCAST_DELAY] = 0,
 			[NEIGH_VAR_PROXY_DELAY] = 0,
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 8b52179ddc6e..7c45b8896709 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -171,7 +171,7 @@ struct neigh_table arp_tbl = {
 			[NEIGH_VAR_BASE_REACHABLE_TIME] = 30 * HZ,
 			[NEIGH_VAR_DELAY_PROBE_TIME] = 5 * HZ,
 			[NEIGH_VAR_GC_STALETIME] = 60 * HZ,
-			[NEIGH_VAR_QUEUE_LEN_BYTES] = 64 * 1024,
+			[NEIGH_VAR_QUEUE_LEN_BYTES] = SK_WMEM_MAX,
 			[NEIGH_VAR_PROXY_QLEN] = 64,
 			[NEIGH_VAR_ANYCAST_DELAY] = 1 * HZ,
 			[NEIGH_VAR_PROXY_DELAY]	= (8 * HZ) / 10,
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 568ccfd6dd37..7616cd76f6f6 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -6086,9 +6086,9 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct net *net = sock_net(sk);
 	struct sock *fastopen_sk = NULL;
-	struct dst_entry *dst = NULL;
 	struct request_sock *req;
 	bool want_cookie = false;
+	struct dst_entry *dst;
 	struct flowi fl;
 
 	/* TW buckets are converted to open requests without
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 5e338eb89509..266a530414d7 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -127,7 +127,7 @@ struct neigh_table nd_tbl = {
 			[NEIGH_VAR_BASE_REACHABLE_TIME] = ND_REACHABLE_TIME,
 			[NEIGH_VAR_DELAY_PROBE_TIME] = 5 * HZ,
 			[NEIGH_VAR_GC_STALETIME] = 60 * HZ,
-			[NEIGH_VAR_QUEUE_LEN_BYTES] = 64 * 1024,
+			[NEIGH_VAR_QUEUE_LEN_BYTES] = SK_WMEM_MAX,
 			[NEIGH_VAR_PROXY_QLEN] = 64,
 			[NEIGH_VAR_ANYCAST_DELAY] = 1 * HZ,
 			[NEIGH_VAR_PROXY_DELAY] = (8 * HZ) / 10,
-- 
cgit v1.2.3


From c1d2b4c3e204e602c97680335d082b8d012d08cd Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Wed, 30 Aug 2017 19:24:57 +0200
Subject: tcp: Revert "tcp: remove CA_ACK_SLOWPATH"

This change was a followup to the header prediction removal,
so first revert this as a prerequisite to back out hp removal.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h       |  5 +++--
 net/ipv4/tcp_input.c    | 35 +++++++++++++++++++----------------
 net/ipv4/tcp_westwood.c | 31 +++++++++++++++++++++++++++----
 3 files changed, 49 insertions(+), 22 deletions(-)

(limited to 'include/net')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index c614ff135b66..c546d13ffbca 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -910,8 +910,9 @@ enum tcp_ca_event {
 
 /* Information about inbound ACK, passed to cong_ops->in_ack_event() */
 enum tcp_ca_ack_event_flags {
-	CA_ACK_WIN_UPDATE	= (1 << 0),	/* ACK updated window */
-	CA_ACK_ECE		= (1 << 1),	/* ECE bit is set on ack */
+	CA_ACK_SLOWPATH		= (1 << 0),	/* In slow path processing */
+	CA_ACK_WIN_UPDATE	= (1 << 1),	/* ACK updated window */
+	CA_ACK_ECE		= (1 << 2),	/* ECE bit is set on ack */
 };
 
 /*
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 7616cd76f6f6..a0e436366d31 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3552,7 +3552,6 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
 	u32 lost = tp->lost;
 	int acked = 0; /* Number of packets newly acked */
 	int rexmit = REXMIT_NONE; /* Flag to (re)transmit to recover losses */
-	u32 ack_ev_flags = 0;
 
 	sack_state.first_sackt = 0;
 	sack_state.rate = &rs;
@@ -3593,26 +3592,30 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
 	if (flag & FLAG_UPDATE_TS_RECENT)
 		tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq);
 
-	if (ack_seq != TCP_SKB_CB(skb)->end_seq)
-		flag |= FLAG_DATA;
-	else
-		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPPUREACKS);
+	{
+		u32 ack_ev_flags = CA_ACK_SLOWPATH;
 
-	flag |= tcp_ack_update_window(sk, skb, ack, ack_seq);
+		if (ack_seq != TCP_SKB_CB(skb)->end_seq)
+			flag |= FLAG_DATA;
+		else
+			NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPPUREACKS);
 
-	if (TCP_SKB_CB(skb)->sacked)
-		flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
-						&sack_state);
+		flag |= tcp_ack_update_window(sk, skb, ack, ack_seq);
 
-	if (tcp_ecn_rcv_ecn_echo(tp, tcp_hdr(skb))) {
-		flag |= FLAG_ECE;
-		ack_ev_flags = CA_ACK_ECE;
-	}
+		if (TCP_SKB_CB(skb)->sacked)
+			flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
+							&sack_state);
+
+		if (tcp_ecn_rcv_ecn_echo(tp, tcp_hdr(skb))) {
+			flag |= FLAG_ECE;
+			ack_ev_flags |= CA_ACK_ECE;
+		}
 
-	if (flag & FLAG_WIN_UPDATE)
-		ack_ev_flags |= CA_ACK_WIN_UPDATE;
+		if (flag & FLAG_WIN_UPDATE)
+			ack_ev_flags |= CA_ACK_WIN_UPDATE;
 
-	tcp_in_ack_event(sk, ack_ev_flags);
+		tcp_in_ack_event(sk, ack_ev_flags);
+	}
 
 	/* We passed data and got it acked, remove any soft error
 	 * log. Something worked...
diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c
index e5de84310949..bec9cafbe3f9 100644
--- a/net/ipv4/tcp_westwood.c
+++ b/net/ipv4/tcp_westwood.c
@@ -153,6 +153,24 @@ static inline void update_rtt_min(struct westwood *w)
 		w->rtt_min = min(w->rtt, w->rtt_min);
 }
 
+/*
+ * @westwood_fast_bw
+ * It is called when we are in fast path. In particular it is called when
+ * header prediction is successful. In such case in fact update is
+ * straight forward and doesn't need any particular care.
+ */
+static inline void westwood_fast_bw(struct sock *sk)
+{
+	const struct tcp_sock *tp = tcp_sk(sk);
+	struct westwood *w = inet_csk_ca(sk);
+
+	westwood_update_window(sk);
+
+	w->bk += tp->snd_una - w->snd_una;
+	w->snd_una = tp->snd_una;
+	update_rtt_min(w);
+}
+
 /*
  * @westwood_acked_count
  * This function evaluates cumul_ack for evaluating bk in case of
@@ -205,12 +223,17 @@ static u32 tcp_westwood_bw_rttmin(const struct sock *sk)
 
 static void tcp_westwood_ack(struct sock *sk, u32 ack_flags)
 {
-	struct westwood *w = inet_csk_ca(sk);
+	if (ack_flags & CA_ACK_SLOWPATH) {
+		struct westwood *w = inet_csk_ca(sk);
 
-	westwood_update_window(sk);
-	w->bk += westwood_acked_count(sk);
+		westwood_update_window(sk);
+		w->bk += westwood_acked_count(sk);
 
-	update_rtt_min(w);
+		update_rtt_min(w);
+		return;
+	}
+
+	westwood_fast_bw(sk);
 }
 
 static void tcp_westwood_event(struct sock *sk, enum tcp_ca_event event)
-- 
cgit v1.2.3


From 31770e34e43d6c8dee129bfee77e56c34e61f0e5 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Wed, 30 Aug 2017 19:24:58 +0200
Subject: tcp: Revert "tcp: remove header prediction"

This reverts commit 45f119bf936b1f9f546a0b139c5b56f9bb2bdc78.

Eric Dumazet says:
  We found at Google a significant regression caused by
  45f119bf936b1f9f546a0b139c5b56f9bb2bdc78 tcp: remove header prediction

  In typical RPC  (TCP_RR), when a TCP socket receives data, we now call
  tcp_ack() while we used to not call it.

  This touches enough cache lines to cause a slowdown.

so problem does not seem to be HP removal itself but the tcp_ack()
call.  Therefore, it might be possible to remove HP after all, provided
one finds a way to elide tcp_ack for most cases.

Reported-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h       |   6 ++
 include/net/tcp.h         |  23 ++++++
 include/uapi/linux/snmp.h |   2 +
 net/ipv4/proc.c           |   2 +
 net/ipv4/tcp.c            |   4 +-
 net/ipv4/tcp_input.c      | 188 ++++++++++++++++++++++++++++++++++++++++++++--
 net/ipv4/tcp_minisocks.c  |   2 +
 net/ipv4/tcp_output.c     |   2 +
 8 files changed, 223 insertions(+), 6 deletions(-)

(limited to 'include/net')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 267164a1d559..4aa40ef02d32 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -147,6 +147,12 @@ struct tcp_sock {
 	u16	tcp_header_len;	/* Bytes of tcp header to send		*/
 	u16	gso_segs;	/* Max number of segs per GSO packet	*/
 
+/*
+ *	Header prediction flags
+ *	0x5?10 << 16 + snd_wnd in net byte order
+ */
+	__be32	pred_flags;
+
 /*
  *	RFC793 variables by their proper names. This means you can
  *	read the code and the spec side by side (and laugh ...)
diff --git a/include/net/tcp.h b/include/net/tcp.h
index c546d13ffbca..9c3db054e47f 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -634,6 +634,29 @@ static inline u32 __tcp_set_rto(const struct tcp_sock *tp)
 	return usecs_to_jiffies((tp->srtt_us >> 3) + tp->rttvar_us);
 }
 
+static inline void __tcp_fast_path_on(struct tcp_sock *tp, u32 snd_wnd)
+{
+	tp->pred_flags = htonl((tp->tcp_header_len << 26) |
+			       ntohl(TCP_FLAG_ACK) |
+			       snd_wnd);
+}
+
+static inline void tcp_fast_path_on(struct tcp_sock *tp)
+{
+	__tcp_fast_path_on(tp, tp->snd_wnd >> tp->rx_opt.snd_wscale);
+}
+
+static inline void tcp_fast_path_check(struct sock *sk)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+
+	if (RB_EMPTY_ROOT(&tp->out_of_order_queue) &&
+	    tp->rcv_wnd &&
+	    atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf &&
+	    !tp->urg_data)
+		tcp_fast_path_on(tp);
+}
+
 /* Compute the actual rto_min value */
 static inline u32 tcp_rto_min(struct sock *sk)
 {
diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h
index b3f346fb9fe3..758f12b58541 100644
--- a/include/uapi/linux/snmp.h
+++ b/include/uapi/linux/snmp.h
@@ -184,7 +184,9 @@ enum
 	LINUX_MIB_DELAYEDACKLOST,		/* DelayedACKLost */
 	LINUX_MIB_LISTENOVERFLOWS,		/* ListenOverflows */
 	LINUX_MIB_LISTENDROPS,			/* ListenDrops */
+	LINUX_MIB_TCPHPHITS,			/* TCPHPHits */
 	LINUX_MIB_TCPPUREACKS,			/* TCPPureAcks */
+	LINUX_MIB_TCPHPACKS,			/* TCPHPAcks */
 	LINUX_MIB_TCPRENORECOVERY,		/* TCPRenoRecovery */
 	LINUX_MIB_TCPSACKRECOVERY,		/* TCPSackRecovery */
 	LINUX_MIB_TCPSACKRENEGING,		/* TCPSACKReneging */
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index b6d3fe03feb3..127153f1ed8a 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -206,7 +206,9 @@ static const struct snmp_mib snmp4_net_list[] = {
 	SNMP_MIB_ITEM("DelayedACKLost", LINUX_MIB_DELAYEDACKLOST),
 	SNMP_MIB_ITEM("ListenOverflows", LINUX_MIB_LISTENOVERFLOWS),
 	SNMP_MIB_ITEM("ListenDrops", LINUX_MIB_LISTENDROPS),
+	SNMP_MIB_ITEM("TCPHPHits", LINUX_MIB_TCPHPHITS),
 	SNMP_MIB_ITEM("TCPPureAcks", LINUX_MIB_TCPPUREACKS),
+	SNMP_MIB_ITEM("TCPHPAcks", LINUX_MIB_TCPHPACKS),
 	SNMP_MIB_ITEM("TCPRenoRecovery", LINUX_MIB_TCPRENORECOVERY),
 	SNMP_MIB_ITEM("TCPSackRecovery", LINUX_MIB_TCPSACKRECOVERY),
 	SNMP_MIB_ITEM("TCPSACKReneging", LINUX_MIB_TCPSACKRENEGING),
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 566083ee2654..21ca2df274c5 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1963,8 +1963,10 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
 		tcp_rcv_space_adjust(sk);
 
 skip_copy:
-		if (tp->urg_data && after(tp->copied_seq, tp->urg_seq))
+		if (tp->urg_data && after(tp->copied_seq, tp->urg_seq)) {
 			tp->urg_data = 0;
+			tcp_fast_path_check(sk);
+		}
 		if (used + offset < skb->len)
 			continue;
 
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index a0e436366d31..c5d7656beeee 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -103,6 +103,7 @@ int sysctl_tcp_invalid_ratelimit __read_mostly = HZ/2;
 #define FLAG_DATA_SACKED	0x20 /* New SACK.				*/
 #define FLAG_ECE		0x40 /* ECE in this ACK				*/
 #define FLAG_LOST_RETRANS	0x80 /* This ACK marks some retransmission lost */
+#define FLAG_SLOWPATH		0x100 /* Do not skip RFC checks for window update.*/
 #define FLAG_ORIG_SACK_ACKED	0x200 /* Never retransmitted data are (s)acked	*/
 #define FLAG_SND_UNA_ADVANCED	0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */
 #define FLAG_DSACKING_ACK	0x800 /* SACK blocks contained D-SACK info */
@@ -3371,6 +3372,12 @@ static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32
 		if (tp->snd_wnd != nwin) {
 			tp->snd_wnd = nwin;
 
+			/* Note, it is the only place, where
+			 * fast path is recovered for sending TCP.
+			 */
+			tp->pred_flags = 0;
+			tcp_fast_path_check(sk);
+
 			if (tcp_send_head(sk))
 				tcp_slow_start_after_idle_check(sk);
 
@@ -3592,7 +3599,19 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
 	if (flag & FLAG_UPDATE_TS_RECENT)
 		tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq);
 
-	{
+	if (!(flag & FLAG_SLOWPATH) && after(ack, prior_snd_una)) {
+		/* Window is constant, pure forward advance.
+		 * No more checks are required.
+		 * Note, we use the fact that SND.UNA>=SND.WL2.
+		 */
+		tcp_update_wl(tp, ack_seq);
+		tcp_snd_una_update(tp, ack);
+		flag |= FLAG_WIN_UPDATE;
+
+		tcp_in_ack_event(sk, CA_ACK_WIN_UPDATE);
+
+		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHPACKS);
+	} else {
 		u32 ack_ev_flags = CA_ACK_SLOWPATH;
 
 		if (ack_seq != TCP_SKB_CB(skb)->end_seq)
@@ -4407,6 +4426,8 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
 	if (TCP_SKB_CB(skb)->has_rxtstamp)
 		TCP_SKB_CB(skb)->swtstamp = skb->tstamp;
 
+	/* Disable header prediction. */
+	tp->pred_flags = 0;
 	inet_csk_schedule_ack(sk);
 
 	NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOQUEUE);
@@ -4647,6 +4668,8 @@ queue_and_out:
 		if (tp->rx_opt.num_sacks)
 			tcp_sack_remove(tp);
 
+		tcp_fast_path_check(sk);
+
 		if (eaten > 0)
 			kfree_skb_partial(skb, fragstolen);
 		if (!sock_flag(sk, SOCK_DEAD))
@@ -4972,6 +4995,7 @@ static int tcp_prune_queue(struct sock *sk)
 	NET_INC_STATS(sock_net(sk), LINUX_MIB_RCVPRUNED);
 
 	/* Massive buffer overcommit. */
+	tp->pred_flags = 0;
 	return -1;
 }
 
@@ -5143,6 +5167,9 @@ static void tcp_check_urg(struct sock *sk, const struct tcphdr *th)
 
 	tp->urg_data = TCP_URG_NOTYET;
 	tp->urg_seq = ptr;
+
+	/* Disable header prediction. */
+	tp->pred_flags = 0;
 }
 
 /* This is the 'fast' part of urgent handling. */
@@ -5301,6 +5328,26 @@ discard:
 
 /*
  *	TCP receive function for the ESTABLISHED state.
+ *
+ *	It is split into a fast path and a slow path. The fast path is
+ * 	disabled when:
+ *	- A zero window was announced from us - zero window probing
+ *        is only handled properly in the slow path.
+ *	- Out of order segments arrived.
+ *	- Urgent data is expected.
+ *	- There is no buffer space left
+ *	- Unexpected TCP flags/window values/header lengths are received
+ *	  (detected by checking the TCP header against pred_flags)
+ *	- Data is sent in both directions. Fast path only supports pure senders
+ *	  or pure receivers (this means either the sequence number or the ack
+ *	  value must stay constant)
+ *	- Unexpected TCP option.
+ *
+ *	When these conditions are not satisfied it drops into a standard
+ *	receive procedure patterned after RFC793 to handle all cases.
+ *	The first three cases are guaranteed by proper pred_flags setting,
+ *	the rest is checked inline. Fast processing is turned on in
+ *	tcp_data_queue when everything is OK.
  */
 void tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
 			 const struct tcphdr *th)
@@ -5311,19 +5358,144 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
 	tcp_mstamp_refresh(tp);
 	if (unlikely(!sk->sk_rx_dst))
 		inet_csk(sk)->icsk_af_ops->sk_rx_dst_set(sk, skb);
+	/*
+	 *	Header prediction.
+	 *	The code loosely follows the one in the famous
+	 *	"30 instruction TCP receive" Van Jacobson mail.
+	 *
+	 *	Van's trick is to deposit buffers into socket queue
+	 *	on a device interrupt, to call tcp_recv function
+	 *	on the receive process context and checksum and copy
+	 *	the buffer to user space. smart...
+	 *
+	 *	Our current scheme is not silly either but we take the
+	 *	extra cost of the net_bh soft interrupt processing...
+	 *	We do checksum and copy also but from device to kernel.
+	 */
 
 	tp->rx_opt.saw_tstamp = 0;
 
+	/*	pred_flags is 0xS?10 << 16 + snd_wnd
+	 *	if header_prediction is to be made
+	 *	'S' will always be tp->tcp_header_len >> 2
+	 *	'?' will be 0 for the fast path, otherwise pred_flags is 0 to
+	 *  turn it off	(when there are holes in the receive
+	 *	 space for instance)
+	 *	PSH flag is ignored.
+	 */
+
+	if ((tcp_flag_word(th) & TCP_HP_BITS) == tp->pred_flags &&
+	    TCP_SKB_CB(skb)->seq == tp->rcv_nxt &&
+	    !after(TCP_SKB_CB(skb)->ack_seq, tp->snd_nxt)) {
+		int tcp_header_len = tp->tcp_header_len;
+
+		/* Timestamp header prediction: tcp_header_len
+		 * is automatically equal to th->doff*4 due to pred_flags
+		 * match.
+		 */
+
+		/* Check timestamp */
+		if (tcp_header_len == sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) {
+			/* No? Slow path! */
+			if (!tcp_parse_aligned_timestamp(tp, th))
+				goto slow_path;
+
+			/* If PAWS failed, check it more carefully in slow path */
+			if ((s32)(tp->rx_opt.rcv_tsval - tp->rx_opt.ts_recent) < 0)
+				goto slow_path;
+
+			/* DO NOT update ts_recent here, if checksum fails
+			 * and timestamp was corrupted part, it will result
+			 * in a hung connection since we will drop all
+			 * future packets due to the PAWS test.
+			 */
+		}
+
+		if (len <= tcp_header_len) {
+			/* Bulk data transfer: sender */
+			if (len == tcp_header_len) {
+				/* Predicted packet is in window by definition.
+				 * seq == rcv_nxt and rcv_wup <= rcv_nxt.
+				 * Hence, check seq<=rcv_wup reduces to:
+				 */
+				if (tcp_header_len ==
+				    (sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) &&
+				    tp->rcv_nxt == tp->rcv_wup)
+					tcp_store_ts_recent(tp);
+
+				/* We know that such packets are checksummed
+				 * on entry.
+				 */
+				tcp_ack(sk, skb, 0);
+				__kfree_skb(skb);
+				tcp_data_snd_check(sk);
+				return;
+			} else { /* Header too small */
+				TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
+				goto discard;
+			}
+		} else {
+			int eaten = 0;
+			bool fragstolen = false;
+
+			if (tcp_checksum_complete(skb))
+				goto csum_error;
+
+			if ((int)skb->truesize > sk->sk_forward_alloc)
+				goto step5;
+
+			/* Predicted packet is in window by definition.
+			 * seq == rcv_nxt and rcv_wup <= rcv_nxt.
+			 * Hence, check seq<=rcv_wup reduces to:
+			 */
+			if (tcp_header_len ==
+			    (sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) &&
+			    tp->rcv_nxt == tp->rcv_wup)
+				tcp_store_ts_recent(tp);
+
+			tcp_rcv_rtt_measure_ts(sk, skb);
+
+			NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHPHITS);
+
+			/* Bulk data transfer: receiver */
+			eaten = tcp_queue_rcv(sk, skb, tcp_header_len,
+					      &fragstolen);
+
+			tcp_event_data_recv(sk, skb);
+
+			if (TCP_SKB_CB(skb)->ack_seq != tp->snd_una) {
+				/* Well, only one small jumplet in fast path... */
+				tcp_ack(sk, skb, FLAG_DATA);
+				tcp_data_snd_check(sk);
+				if (!inet_csk_ack_scheduled(sk))
+					goto no_ack;
+			}
+
+			__tcp_ack_snd_check(sk, 0);
+no_ack:
+			if (eaten)
+				kfree_skb_partial(skb, fragstolen);
+			sk->sk_data_ready(sk);
+			return;
+		}
+	}
+
+slow_path:
 	if (len < (th->doff << 2) || tcp_checksum_complete(skb))
 		goto csum_error;
 
 	if (!th->ack && !th->rst && !th->syn)
 		goto discard;
 
+	/*
+	 *	Standard slow path.
+	 */
+
 	if (!tcp_validate_incoming(sk, skb, th, 1))
 		return;
 
-	if (tcp_ack(sk, skb, FLAG_UPDATE_TS_RECENT) < 0)
+step5:
+	if (tcp_ack(sk, skb, FLAG_SLOWPATH | FLAG_UPDATE_TS_RECENT) < 0)
 		goto discard;
 
 	tcp_rcv_rtt_measure_ts(sk, skb);
@@ -5376,6 +5548,11 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb)
 
 	if (sock_flag(sk, SOCK_KEEPOPEN))
 		inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tp));
+
+	if (!tp->rx_opt.snd_wscale)
+		__tcp_fast_path_on(tp, tp->snd_wnd);
+	else
+		tp->pred_flags = 0;
 }
 
 static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
@@ -5504,7 +5681,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
 		tcp_ecn_rcv_synack(tp, th);
 
 		tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
-		tcp_ack(sk, skb, 0);
+		tcp_ack(sk, skb, FLAG_SLOWPATH);
 
 		/* Ok.. it's good. Set up sequence numbers and
 		 * move to established.
@@ -5740,8 +5917,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
 		return 0;
 
 	/* step 5: check the ACK field */
-
-	acceptable = tcp_ack(sk, skb, FLAG_UPDATE_TS_RECENT |
+	acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH |
+				      FLAG_UPDATE_TS_RECENT |
 				      FLAG_NO_CHALLENGE_ACK) > 0;
 
 	if (!acceptable) {
@@ -5809,6 +5986,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
 		tp->lsndtime = tcp_jiffies32;
 
 		tcp_initialize_rcv_mss(sk);
+		tcp_fast_path_on(tp);
 		break;
 
 	case TCP_FIN_WAIT1: {
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 1537b87c657f..188a6f31356d 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -436,6 +436,8 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
 		struct tcp_sock *newtp = tcp_sk(newsk);
 
 		/* Now setup tcp_sock */
+		newtp->pred_flags = 0;
+
 		newtp->rcv_wup = newtp->copied_seq =
 		newtp->rcv_nxt = treq->rcv_isn + 1;
 		newtp->segs_in = 1;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 3e0d19631534..5b6690d05abb 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -295,7 +295,9 @@ static u16 tcp_select_window(struct sock *sk)
 	/* RFC1323 scaling applied */
 	new_win >>= tp->rx_opt.rcv_wscale;
 
+	/* If we advertise zero window, disable fast path. */
 	if (new_win == 0) {
+		tp->pred_flags = 0;
 		if (old_win)
 			NET_INC_STATS(sock_net(sk),
 				      LINUX_MIB_TCPTOZEROWINDOWADV);
-- 
cgit v1.2.3


From 65a206c01e8e7ffe971477a36419422099216eff Mon Sep 17 00:00:00 2001
From: Chris Mi <chrism@mellanox.com>
Date: Wed, 30 Aug 2017 02:31:59 -0400
Subject: net/sched: Change act_api and act_xxx modules to use IDR

Typically, each TC filter has its own action. All the actions of the
same type are saved in its hash table. But the hash buckets are too
small that it degrades to a list. And the performance is greatly
affected. For example, it takes about 0m11.914s to insert 64K rules.
If we convert the hash table to IDR, it only takes about 0m1.500s.
The improvement is huge.

But please note that the test result is based on previous patch that
cls_flower uses IDR.

Signed-off-by: Chris Mi <chrism@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/act_api.h      |  76 +++++---------
 net/sched/act_api.c        | 251 ++++++++++++++++++++++-----------------------
 net/sched/act_bpf.c        |  17 ++-
 net/sched/act_connmark.c   |  16 ++-
 net/sched/act_csum.c       |  16 ++-
 net/sched/act_gact.c       |  16 ++-
 net/sched/act_ife.c        |  18 ++--
 net/sched/act_ipt.c        |  26 +++--
 net/sched/act_mirred.c     |  19 ++--
 net/sched/act_nat.c        |  16 ++-
 net/sched/act_pedit.c      |  18 ++--
 net/sched/act_police.c     |  18 ++--
 net/sched/act_sample.c     |  17 ++-
 net/sched/act_simple.c     |  20 ++--
 net/sched/act_skbedit.c    |  18 ++--
 net/sched/act_skbmod.c     |  18 ++--
 net/sched/act_tunnel_key.c |  20 ++--
 net/sched/act_vlan.c       |  22 ++--
 18 files changed, 278 insertions(+), 344 deletions(-)

(limited to 'include/net')

diff --git a/include/net/act_api.h b/include/net/act_api.h
index 26ffd8333f50..8f3d5d8b5ae0 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -10,12 +10,9 @@
 #include <net/net_namespace.h>
 #include <net/netns/generic.h>
 
-
-struct tcf_hashinfo {
-	struct hlist_head	*htab;
-	unsigned int		hmask;
-	spinlock_t		lock;
-	u32			index;
+struct tcf_idrinfo {
+	spinlock_t	lock;
+	struct idr	action_idr;
 };
 
 struct tc_action_ops;
@@ -25,9 +22,8 @@ struct tc_action {
 	__u32				type; /* for backward compat(TCA_OLD_COMPAT) */
 	__u32				order;
 	struct list_head		list;
-	struct tcf_hashinfo		*hinfo;
+	struct tcf_idrinfo		*idrinfo;
 
-	struct hlist_node		tcfa_head;
 	u32				tcfa_index;
 	int				tcfa_refcnt;
 	int				tcfa_bindcnt;
@@ -44,7 +40,6 @@ struct tc_action {
 	struct tc_cookie	*act_cookie;
 	struct tcf_chain	*goto_chain;
 };
-#define tcf_head	common.tcfa_head
 #define tcf_index	common.tcfa_index
 #define tcf_refcnt	common.tcfa_refcnt
 #define tcf_bindcnt	common.tcfa_bindcnt
@@ -57,27 +52,6 @@ struct tc_action {
 #define tcf_lock	common.tcfa_lock
 #define tcf_rcu		common.tcfa_rcu
 
-static inline unsigned int tcf_hash(u32 index, unsigned int hmask)
-{
-	return index & hmask;
-}
-
-static inline int tcf_hashinfo_init(struct tcf_hashinfo *hf, unsigned int mask)
-{
-	int i;
-
-	spin_lock_init(&hf->lock);
-	hf->index = 0;
-	hf->hmask = mask;
-	hf->htab = kzalloc((mask + 1) * sizeof(struct hlist_head),
-			   GFP_KERNEL);
-	if (!hf->htab)
-		return -ENOMEM;
-	for (i = 0; i < mask + 1; i++)
-		INIT_HLIST_HEAD(&hf->htab[i]);
-	return 0;
-}
-
 /* Update lastuse only if needed, to avoid dirtying a cache line.
  * We use a temp variable to avoid fetching jiffies twice.
  */
@@ -126,53 +100,51 @@ struct tc_action_ops {
 };
 
 struct tc_action_net {
-	struct tcf_hashinfo *hinfo;
+	struct tcf_idrinfo *idrinfo;
 	const struct tc_action_ops *ops;
 };
 
 static inline
 int tc_action_net_init(struct tc_action_net *tn,
-		       const struct tc_action_ops *ops, unsigned int mask)
+		       const struct tc_action_ops *ops)
 {
 	int err = 0;
 
-	tn->hinfo = kmalloc(sizeof(*tn->hinfo), GFP_KERNEL);
-	if (!tn->hinfo)
+	tn->idrinfo = kmalloc(sizeof(*tn->idrinfo), GFP_KERNEL);
+	if (!tn->idrinfo)
 		return -ENOMEM;
 	tn->ops = ops;
-	err = tcf_hashinfo_init(tn->hinfo, mask);
-	if (err)
-		kfree(tn->hinfo);
+	spin_lock_init(&tn->idrinfo->lock);
+	idr_init(&tn->idrinfo->action_idr);
 	return err;
 }
 
-void tcf_hashinfo_destroy(const struct tc_action_ops *ops,
-			  struct tcf_hashinfo *hinfo);
+void tcf_idrinfo_destroy(const struct tc_action_ops *ops,
+			 struct tcf_idrinfo *idrinfo);
 
 static inline void tc_action_net_exit(struct tc_action_net *tn)
 {
-	tcf_hashinfo_destroy(tn->ops, tn->hinfo);
-	kfree(tn->hinfo);
+	tcf_idrinfo_destroy(tn->ops, tn->idrinfo);
+	kfree(tn->idrinfo);
 }
 
 int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb,
 		       struct netlink_callback *cb, int type,
 		       const struct tc_action_ops *ops);
-int tcf_hash_search(struct tc_action_net *tn, struct tc_action **a, u32 index);
-u32 tcf_hash_new_index(struct tc_action_net *tn);
-bool tcf_hash_check(struct tc_action_net *tn, u32 index, struct tc_action **a,
+int tcf_idr_search(struct tc_action_net *tn, struct tc_action **a, u32 index);
+bool tcf_idr_check(struct tc_action_net *tn, u32 index, struct tc_action **a,
 		    int bind);
-int tcf_hash_create(struct tc_action_net *tn, u32 index, struct nlattr *est,
-		    struct tc_action **a, const struct tc_action_ops *ops, int bind,
-		    bool cpustats);
-void tcf_hash_cleanup(struct tc_action *a, struct nlattr *est);
-void tcf_hash_insert(struct tc_action_net *tn, struct tc_action *a);
+int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est,
+		   struct tc_action **a, const struct tc_action_ops *ops,
+		   int bind, bool cpustats);
+void tcf_idr_cleanup(struct tc_action *a, struct nlattr *est);
+void tcf_idr_insert(struct tc_action_net *tn, struct tc_action *a);
 
-int __tcf_hash_release(struct tc_action *a, bool bind, bool strict);
+int __tcf_idr_release(struct tc_action *a, bool bind, bool strict);
 
-static inline int tcf_hash_release(struct tc_action *a, bool bind)
+static inline int tcf_idr_release(struct tc_action *a, bool bind)
 {
-	return __tcf_hash_release(a, bind, false);
+	return __tcf_idr_release(a, bind, false);
 }
 
 int tcf_register_action(struct tc_action_ops *a, struct pernet_operations *ops);
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 02fcb0c78a28..0eb545bcb247 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -70,11 +70,11 @@ static void free_tcf(struct rcu_head *head)
 	kfree(p);
 }
 
-static void tcf_hash_destroy(struct tcf_hashinfo *hinfo, struct tc_action *p)
+static void tcf_idr_remove(struct tcf_idrinfo *idrinfo, struct tc_action *p)
 {
-	spin_lock_bh(&hinfo->lock);
-	hlist_del(&p->tcfa_head);
-	spin_unlock_bh(&hinfo->lock);
+	spin_lock_bh(&idrinfo->lock);
+	idr_remove_ext(&idrinfo->action_idr, p->tcfa_index);
+	spin_unlock_bh(&idrinfo->lock);
 	gen_kill_estimator(&p->tcfa_rate_est);
 	/*
 	 * gen_estimator est_timer() might access p->tcfa_lock
@@ -83,7 +83,7 @@ static void tcf_hash_destroy(struct tcf_hashinfo *hinfo, struct tc_action *p)
 	call_rcu(&p->tcfa_rcu, free_tcf);
 }
 
-int __tcf_hash_release(struct tc_action *p, bool bind, bool strict)
+int __tcf_idr_release(struct tc_action *p, bool bind, bool strict)
 {
 	int ret = 0;
 
@@ -97,64 +97,60 @@ int __tcf_hash_release(struct tc_action *p, bool bind, bool strict)
 		if (p->tcfa_bindcnt <= 0 && p->tcfa_refcnt <= 0) {
 			if (p->ops->cleanup)
 				p->ops->cleanup(p, bind);
-			tcf_hash_destroy(p->hinfo, p);
+			tcf_idr_remove(p->idrinfo, p);
 			ret = ACT_P_DELETED;
 		}
 	}
 
 	return ret;
 }
-EXPORT_SYMBOL(__tcf_hash_release);
+EXPORT_SYMBOL(__tcf_idr_release);
 
-static int tcf_dump_walker(struct tcf_hashinfo *hinfo, struct sk_buff *skb,
+static int tcf_dump_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb,
 			   struct netlink_callback *cb)
 {
-	int err = 0, index = -1, i = 0, s_i = 0, n_i = 0;
+	int err = 0, index = -1, s_i = 0, n_i = 0;
 	u32 act_flags = cb->args[2];
 	unsigned long jiffy_since = cb->args[3];
 	struct nlattr *nest;
+	struct idr *idr = &idrinfo->action_idr;
+	struct tc_action *p;
+	unsigned long id = 1;
 
-	spin_lock_bh(&hinfo->lock);
+	spin_lock_bh(&idrinfo->lock);
 
 	s_i = cb->args[0];
 
-	for (i = 0; i < (hinfo->hmask + 1); i++) {
-		struct hlist_head *head;
-		struct tc_action *p;
-
-		head = &hinfo->htab[tcf_hash(i, hinfo->hmask)];
-
-		hlist_for_each_entry_rcu(p, head, tcfa_head) {
-			index++;
-			if (index < s_i)
-				continue;
-
-			if (jiffy_since &&
-			    time_after(jiffy_since,
-				       (unsigned long)p->tcfa_tm.lastuse))
-				continue;
-
-			nest = nla_nest_start(skb, n_i);
-			if (nest == NULL)
-				goto nla_put_failure;
-			err = tcf_action_dump_1(skb, p, 0, 0);
-			if (err < 0) {
-				index--;
-				nlmsg_trim(skb, nest);
-				goto done;
-			}
-			nla_nest_end(skb, nest);
-			n_i++;
-			if (!(act_flags & TCA_FLAG_LARGE_DUMP_ON) &&
-			    n_i >= TCA_ACT_MAX_PRIO)
-				goto done;
+	idr_for_each_entry_ext(idr, p, id) {
+		index++;
+		if (index < s_i)
+			continue;
+
+		if (jiffy_since &&
+		    time_after(jiffy_since,
+			       (unsigned long)p->tcfa_tm.lastuse))
+			continue;
+
+		nest = nla_nest_start(skb, n_i);
+		if (!nest)
+			goto nla_put_failure;
+		err = tcf_action_dump_1(skb, p, 0, 0);
+		if (err < 0) {
+			index--;
+			nlmsg_trim(skb, nest);
+			goto done;
 		}
+		nla_nest_end(skb, nest);
+		n_i++;
+		if (!(act_flags & TCA_FLAG_LARGE_DUMP_ON) &&
+		    n_i >= TCA_ACT_MAX_PRIO)
+			goto done;
 	}
 done:
 	if (index >= 0)
 		cb->args[0] = index + 1;
 
-	spin_unlock_bh(&hinfo->lock);
+	spin_unlock_bh(&idrinfo->lock);
 	if (n_i) {
 		if (act_flags & TCA_FLAG_LARGE_DUMP_ON)
 			cb->args[1] = n_i;
@@ -166,31 +162,29 @@ nla_put_failure:
 	goto done;
 }
 
-static int tcf_del_walker(struct tcf_hashinfo *hinfo, struct sk_buff *skb,
+static int tcf_del_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb,
 			  const struct tc_action_ops *ops)
 {
 	struct nlattr *nest;
-	int i = 0, n_i = 0;
+	int n_i = 0;
 	int ret = -EINVAL;
+	struct idr *idr = &idrinfo->action_idr;
+	struct tc_action *p;
+	unsigned long id = 1;
 
 	nest = nla_nest_start(skb, 0);
 	if (nest == NULL)
 		goto nla_put_failure;
 	if (nla_put_string(skb, TCA_KIND, ops->kind))
 		goto nla_put_failure;
-	for (i = 0; i < (hinfo->hmask + 1); i++) {
-		struct hlist_head *head;
-		struct hlist_node *n;
-		struct tc_action *p;
-
-		head = &hinfo->htab[tcf_hash(i, hinfo->hmask)];
-		hlist_for_each_entry_safe(p, n, head, tcfa_head) {
-			ret = __tcf_hash_release(p, false, true);
-			if (ret == ACT_P_DELETED) {
-				module_put(p->ops->owner);
-				n_i++;
-			} else if (ret < 0)
-				goto nla_put_failure;
+
+	idr_for_each_entry_ext(idr, p, id) {
+		ret = __tcf_idr_release(p, false, true);
+		if (ret == ACT_P_DELETED) {
+			module_put(p->ops->owner);
+			n_i++;
+		} else if (ret < 0) {
+			goto nla_put_failure;
 		}
 	}
 	if (nla_put_u32(skb, TCA_FCNT, n_i))
@@ -207,12 +201,12 @@ int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb,
 		       struct netlink_callback *cb, int type,
 		       const struct tc_action_ops *ops)
 {
-	struct tcf_hashinfo *hinfo = tn->hinfo;
+	struct tcf_idrinfo *idrinfo = tn->idrinfo;
 
 	if (type == RTM_DELACTION) {
-		return tcf_del_walker(hinfo, skb, ops);
+		return tcf_del_walker(idrinfo, skb, ops);
 	} else if (type == RTM_GETACTION) {
-		return tcf_dump_walker(hinfo, skb, cb);
+		return tcf_dump_walker(idrinfo, skb, cb);
 	} else {
 		WARN(1, "tcf_generic_walker: unknown action %d\n", type);
 		return -EINVAL;
@@ -220,40 +214,21 @@ int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb,
 }
 EXPORT_SYMBOL(tcf_generic_walker);
 
-static struct tc_action *tcf_hash_lookup(u32 index, struct tcf_hashinfo *hinfo)
+static struct tc_action *tcf_idr_lookup(u32 index, struct tcf_idrinfo *idrinfo)
 {
 	struct tc_action *p = NULL;
-	struct hlist_head *head;
 
-	spin_lock_bh(&hinfo->lock);
-	head = &hinfo->htab[tcf_hash(index, hinfo->hmask)];
-	hlist_for_each_entry_rcu(p, head, tcfa_head)
-		if (p->tcfa_index == index)
-			break;
-	spin_unlock_bh(&hinfo->lock);
+	spin_lock_bh(&idrinfo->lock);
+	p = idr_find_ext(&idrinfo->action_idr, index);
+	spin_unlock_bh(&idrinfo->lock);
 
 	return p;
 }
 
-u32 tcf_hash_new_index(struct tc_action_net *tn)
-{
-	struct tcf_hashinfo *hinfo = tn->hinfo;
-	u32 val = hinfo->index;
-
-	do {
-		if (++val == 0)
-			val = 1;
-	} while (tcf_hash_lookup(val, hinfo));
-
-	hinfo->index = val;
-	return val;
-}
-EXPORT_SYMBOL(tcf_hash_new_index);
-
-int tcf_hash_search(struct tc_action_net *tn, struct tc_action **a, u32 index)
+int tcf_idr_search(struct tc_action_net *tn, struct tc_action **a, u32 index)
 {
-	struct tcf_hashinfo *hinfo = tn->hinfo;
-	struct tc_action *p = tcf_hash_lookup(index, hinfo);
+	struct tcf_idrinfo *idrinfo = tn->idrinfo;
+	struct tc_action *p = tcf_idr_lookup(index, idrinfo);
 
 	if (p) {
 		*a = p;
@@ -261,15 +236,15 @@ int tcf_hash_search(struct tc_action_net *tn, struct tc_action **a, u32 index)
 	}
 	return 0;
 }
-EXPORT_SYMBOL(tcf_hash_search);
+EXPORT_SYMBOL(tcf_idr_search);
 
-bool tcf_hash_check(struct tc_action_net *tn, u32 index, struct tc_action **a,
-		    int bind)
+bool tcf_idr_check(struct tc_action_net *tn, u32 index, struct tc_action **a,
+		   int bind)
 {
-	struct tcf_hashinfo *hinfo = tn->hinfo;
-	struct tc_action *p = NULL;
+	struct tcf_idrinfo *idrinfo = tn->idrinfo;
+	struct tc_action *p = tcf_idr_lookup(index, idrinfo);
 
-	if (index && (p = tcf_hash_lookup(index, hinfo)) != NULL) {
+	if (index && p) {
 		if (bind)
 			p->tcfa_bindcnt++;
 		p->tcfa_refcnt++;
@@ -278,23 +253,25 @@ bool tcf_hash_check(struct tc_action_net *tn, u32 index, struct tc_action **a,
 	}
 	return false;
 }
-EXPORT_SYMBOL(tcf_hash_check);
+EXPORT_SYMBOL(tcf_idr_check);
 
-void tcf_hash_cleanup(struct tc_action *a, struct nlattr *est)
+void tcf_idr_cleanup(struct tc_action *a, struct nlattr *est)
 {
 	if (est)
 		gen_kill_estimator(&a->tcfa_rate_est);
 	call_rcu(&a->tcfa_rcu, free_tcf);
 }
-EXPORT_SYMBOL(tcf_hash_cleanup);
+EXPORT_SYMBOL(tcf_idr_cleanup);
 
-int tcf_hash_create(struct tc_action_net *tn, u32 index, struct nlattr *est,
-		    struct tc_action **a, const struct tc_action_ops *ops,
-		    int bind, bool cpustats)
+int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est,
+		   struct tc_action **a, const struct tc_action_ops *ops,
+		   int bind, bool cpustats)
 {
 	struct tc_action *p = kzalloc(ops->size, GFP_KERNEL);
-	struct tcf_hashinfo *hinfo = tn->hinfo;
+	struct tcf_idrinfo *idrinfo = tn->idrinfo;
+	struct idr *idr = &idrinfo->action_idr;
 	int err = -ENOMEM;
+	unsigned long idr_index;
 
 	if (unlikely(!p))
 		return -ENOMEM;
@@ -317,8 +294,28 @@ err2:
 		}
 	}
 	spin_lock_init(&p->tcfa_lock);
-	INIT_HLIST_NODE(&p->tcfa_head);
-	p->tcfa_index = index ? index : tcf_hash_new_index(tn);
+	/* user doesn't specify an index */
+	if (!index) {
+		spin_lock_bh(&idrinfo->lock);
+		err = idr_alloc_ext(idr, NULL, &idr_index, 1, 0,
+				    GFP_KERNEL);
+		spin_unlock_bh(&idrinfo->lock);
+		if (err) {
+err3:
+			free_percpu(p->cpu_qstats);
+			goto err2;
+		}
+		p->tcfa_index = idr_index;
+	} else {
+		spin_lock_bh(&idrinfo->lock);
+		err = idr_alloc_ext(idr, NULL, NULL, index, index + 1,
+				    GFP_KERNEL);
+		spin_unlock_bh(&idrinfo->lock);
+		if (err)
+			goto err3;
+		p->tcfa_index = index;
+	}
+
 	p->tcfa_tm.install = jiffies;
 	p->tcfa_tm.lastuse = jiffies;
 	p->tcfa_tm.firstuse = 0;
@@ -327,52 +324,46 @@ err2:
 					&p->tcfa_rate_est,
 					&p->tcfa_lock, NULL, est);
 		if (err) {
-			free_percpu(p->cpu_qstats);
-			goto err2;
+			goto err3;
 		}
 	}
 
-	p->hinfo = hinfo;
+	p->idrinfo = idrinfo;
 	p->ops = ops;
 	INIT_LIST_HEAD(&p->list);
 	*a = p;
 	return 0;
 }
-EXPORT_SYMBOL(tcf_hash_create);
+EXPORT_SYMBOL(tcf_idr_create);
 
-void tcf_hash_insert(struct tc_action_net *tn, struct tc_action *a)
+void tcf_idr_insert(struct tc_action_net *tn, struct tc_action *a)
 {
-	struct tcf_hashinfo *hinfo = tn->hinfo;
-	unsigned int h = tcf_hash(a->tcfa_index, hinfo->hmask);
+	struct tcf_idrinfo *idrinfo = tn->idrinfo;
 
-	spin_lock_bh(&hinfo->lock);
-	hlist_add_head(&a->tcfa_head, &hinfo->htab[h]);
-	spin_unlock_bh(&hinfo->lock);
+	spin_lock_bh(&idrinfo->lock);
+	idr_replace_ext(&idrinfo->action_idr, a, a->tcfa_index);
+	spin_unlock_bh(&idrinfo->lock);
 }
-EXPORT_SYMBOL(tcf_hash_insert);
+EXPORT_SYMBOL(tcf_idr_insert);
 
-void tcf_hashinfo_destroy(const struct tc_action_ops *ops,
-			  struct tcf_hashinfo *hinfo)
+void tcf_idrinfo_destroy(const struct tc_action_ops *ops,
+			 struct tcf_idrinfo *idrinfo)
 {
-	int i;
-
-	for (i = 0; i < hinfo->hmask + 1; i++) {
-		struct tc_action *p;
-		struct hlist_node *n;
-
-		hlist_for_each_entry_safe(p, n, &hinfo->htab[i], tcfa_head) {
-			int ret;
+	struct idr *idr = &idrinfo->action_idr;
+	struct tc_action *p;
+	int ret;
+	unsigned long id = 1;
 
-			ret = __tcf_hash_release(p, false, true);
-			if (ret == ACT_P_DELETED)
-				module_put(ops->owner);
-			else if (ret < 0)
-				return;
-		}
+	idr_for_each_entry_ext(idr, p, id) {
+		ret = __tcf_idr_release(p, false, true);
+		if (ret == ACT_P_DELETED)
+			module_put(ops->owner);
+		else if (ret < 0)
+			return;
 	}
-	kfree(hinfo->htab);
+	idr_destroy(&idrinfo->action_idr);
 }
-EXPORT_SYMBOL(tcf_hashinfo_destroy);
+EXPORT_SYMBOL(tcf_idrinfo_destroy);
 
 static LIST_HEAD(act_base);
 static DEFINE_RWLOCK(act_mod_lock);
@@ -524,7 +515,7 @@ int tcf_action_destroy(struct list_head *actions, int bind)
 	int ret = 0;
 
 	list_for_each_entry_safe(a, tmp, actions, list) {
-		ret = __tcf_hash_release(a, bind, true);
+		ret = __tcf_idr_release(a, bind, true);
 		if (ret == ACT_P_DELETED)
 			module_put(a->ops->owner);
 		else if (ret < 0)
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index 9afe1337cfd1..c0c707eb2c96 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -21,7 +21,6 @@
 #include <linux/tc_act/tc_bpf.h>
 #include <net/tc_act/tc_bpf.h>
 
-#define BPF_TAB_MASK		15
 #define ACT_BPF_NAME_LEN	256
 
 struct tcf_bpf_cfg {
@@ -295,9 +294,9 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
 
 	parm = nla_data(tb[TCA_ACT_BPF_PARMS]);
 
-	if (!tcf_hash_check(tn, parm->index, act, bind)) {
-		ret = tcf_hash_create(tn, parm->index, est, act,
-				      &act_bpf_ops, bind, true);
+	if (!tcf_idr_check(tn, parm->index, act, bind)) {
+		ret = tcf_idr_create(tn, parm->index, est, act,
+				     &act_bpf_ops, bind, true);
 		if (ret < 0)
 			return ret;
 
@@ -307,7 +306,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
 		if (bind)
 			return 0;
 
-		tcf_hash_release(*act, bind);
+		tcf_idr_release(*act, bind);
 		if (!replace)
 			return -EEXIST;
 	}
@@ -343,7 +342,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
 	rcu_assign_pointer(prog->filter, cfg.filter);
 
 	if (res == ACT_P_CREATED) {
-		tcf_hash_insert(tn, *act);
+		tcf_idr_insert(tn, *act);
 	} else {
 		/* make sure the program being replaced is no longer executing */
 		synchronize_rcu();
@@ -353,7 +352,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
 	return res;
 out:
 	if (res == ACT_P_CREATED)
-		tcf_hash_cleanup(*act, est);
+		tcf_idr_cleanup(*act, est);
 
 	return ret;
 }
@@ -379,7 +378,7 @@ static int tcf_bpf_search(struct net *net, struct tc_action **a, u32 index)
 {
 	struct tc_action_net *tn = net_generic(net, bpf_net_id);
 
-	return tcf_hash_search(tn, a, index);
+	return tcf_idr_search(tn, a, index);
 }
 
 static struct tc_action_ops act_bpf_ops __read_mostly = {
@@ -399,7 +398,7 @@ static __net_init int bpf_init_net(struct net *net)
 {
 	struct tc_action_net *tn = net_generic(net, bpf_net_id);
 
-	return tc_action_net_init(tn, &act_bpf_ops, BPF_TAB_MASK);
+	return tc_action_net_init(tn, &act_bpf_ops);
 }
 
 static void __net_exit bpf_exit_net(struct net *net)
diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c
index 2155bc6c6a1e..10b7a8855a6c 100644
--- a/net/sched/act_connmark.c
+++ b/net/sched/act_connmark.c
@@ -28,8 +28,6 @@
 #include <net/netfilter/nf_conntrack_core.h>
 #include <net/netfilter/nf_conntrack_zones.h>
 
-#define CONNMARK_TAB_MASK     3
-
 static unsigned int connmark_net_id;
 static struct tc_action_ops act_connmark_ops;
 
@@ -119,9 +117,9 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla,
 
 	parm = nla_data(tb[TCA_CONNMARK_PARMS]);
 
-	if (!tcf_hash_check(tn, parm->index, a, bind)) {
-		ret = tcf_hash_create(tn, parm->index, est, a,
-				      &act_connmark_ops, bind, false);
+	if (!tcf_idr_check(tn, parm->index, a, bind)) {
+		ret = tcf_idr_create(tn, parm->index, est, a,
+				     &act_connmark_ops, bind, false);
 		if (ret)
 			return ret;
 
@@ -130,13 +128,13 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla,
 		ci->net = net;
 		ci->zone = parm->zone;
 
-		tcf_hash_insert(tn, *a);
+		tcf_idr_insert(tn, *a);
 		ret = ACT_P_CREATED;
 	} else {
 		ci = to_connmark(*a);
 		if (bind)
 			return 0;
-		tcf_hash_release(*a, bind);
+		tcf_idr_release(*a, bind);
 		if (!ovr)
 			return -EEXIST;
 		/* replacing action and zone */
@@ -189,7 +187,7 @@ static int tcf_connmark_search(struct net *net, struct tc_action **a, u32 index)
 {
 	struct tc_action_net *tn = net_generic(net, connmark_net_id);
 
-	return tcf_hash_search(tn, a, index);
+	return tcf_idr_search(tn, a, index);
 }
 
 static struct tc_action_ops act_connmark_ops = {
@@ -208,7 +206,7 @@ static __net_init int connmark_init_net(struct net *net)
 {
 	struct tc_action_net *tn = net_generic(net, connmark_net_id);
 
-	return tc_action_net_init(tn, &act_connmark_ops, CONNMARK_TAB_MASK);
+	return tc_action_net_init(tn, &act_connmark_ops);
 }
 
 static void __net_exit connmark_exit_net(struct net *net)
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index 67afc12df88b..1c40caadcff9 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -37,8 +37,6 @@
 #include <linux/tc_act/tc_csum.h>
 #include <net/tc_act/tc_csum.h>
 
-#define CSUM_TAB_MASK 15
-
 static const struct nla_policy csum_policy[TCA_CSUM_MAX + 1] = {
 	[TCA_CSUM_PARMS] = { .len = sizeof(struct tc_csum), },
 };
@@ -67,16 +65,16 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla,
 		return -EINVAL;
 	parm = nla_data(tb[TCA_CSUM_PARMS]);
 
-	if (!tcf_hash_check(tn, parm->index, a, bind)) {
-		ret = tcf_hash_create(tn, parm->index, est, a,
-				      &act_csum_ops, bind, false);
+	if (!tcf_idr_check(tn, parm->index, a, bind)) {
+		ret = tcf_idr_create(tn, parm->index, est, a,
+				     &act_csum_ops, bind, false);
 		if (ret)
 			return ret;
 		ret = ACT_P_CREATED;
 	} else {
 		if (bind)/* dont override defaults */
 			return 0;
-		tcf_hash_release(*a, bind);
+		tcf_idr_release(*a, bind);
 		if (!ovr)
 			return -EEXIST;
 	}
@@ -88,7 +86,7 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla,
 	spin_unlock_bh(&p->tcf_lock);
 
 	if (ret == ACT_P_CREATED)
-		tcf_hash_insert(tn, *a);
+		tcf_idr_insert(tn, *a);
 
 	return ret;
 }
@@ -609,7 +607,7 @@ static int tcf_csum_search(struct net *net, struct tc_action **a, u32 index)
 {
 	struct tc_action_net *tn = net_generic(net, csum_net_id);
 
-	return tcf_hash_search(tn, a, index);
+	return tcf_idr_search(tn, a, index);
 }
 
 static struct tc_action_ops act_csum_ops = {
@@ -628,7 +626,7 @@ static __net_init int csum_init_net(struct net *net)
 {
 	struct tc_action_net *tn = net_generic(net, csum_net_id);
 
-	return tc_action_net_init(tn, &act_csum_ops, CSUM_TAB_MASK);
+	return tc_action_net_init(tn, &act_csum_ops);
 }
 
 static void __net_exit csum_exit_net(struct net *net)
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index 99afe8b1f1fb..e29a48ef7fc3 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -23,8 +23,6 @@
 #include <linux/tc_act/tc_gact.h>
 #include <net/tc_act/tc_gact.h>
 
-#define GACT_TAB_MASK	15
-
 static unsigned int gact_net_id;
 static struct tc_action_ops act_gact_ops;
 
@@ -92,16 +90,16 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla,
 	}
 #endif
 
-	if (!tcf_hash_check(tn, parm->index, a, bind)) {
-		ret = tcf_hash_create(tn, parm->index, est, a,
-				      &act_gact_ops, bind, true);
+	if (!tcf_idr_check(tn, parm->index, a, bind)) {
+		ret = tcf_idr_create(tn, parm->index, est, a,
+				     &act_gact_ops, bind, true);
 		if (ret)
 			return ret;
 		ret = ACT_P_CREATED;
 	} else {
 		if (bind)/* dont override defaults */
 			return 0;
-		tcf_hash_release(*a, bind);
+		tcf_idr_release(*a, bind);
 		if (!ovr)
 			return -EEXIST;
 	}
@@ -122,7 +120,7 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla,
 	}
 #endif
 	if (ret == ACT_P_CREATED)
-		tcf_hash_insert(tn, *a);
+		tcf_idr_insert(tn, *a);
 	return ret;
 }
 
@@ -214,7 +212,7 @@ static int tcf_gact_search(struct net *net, struct tc_action **a, u32 index)
 {
 	struct tc_action_net *tn = net_generic(net, gact_net_id);
 
-	return tcf_hash_search(tn, a, index);
+	return tcf_idr_search(tn, a, index);
 }
 
 static struct tc_action_ops act_gact_ops = {
@@ -234,7 +232,7 @@ static __net_init int gact_init_net(struct net *net)
 {
 	struct tc_action_net *tn = net_generic(net, gact_net_id);
 
-	return tc_action_net_init(tn, &act_gact_ops, GACT_TAB_MASK);
+	return tc_action_net_init(tn, &act_gact_ops);
 }
 
 static void __net_exit gact_exit_net(struct net *net)
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index 7ed1be80ee86..8ccd35825b6b 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -34,8 +34,6 @@
 #include <linux/etherdevice.h>
 #include <net/ife.h>
 
-#define IFE_TAB_MASK 15
-
 static unsigned int ife_net_id;
 static int max_metacnt = IFE_META_MAX + 1;
 static struct tc_action_ops act_ife_ops;
@@ -452,18 +450,18 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
 
 	parm = nla_data(tb[TCA_IFE_PARMS]);
 
-	exists = tcf_hash_check(tn, parm->index, a, bind);
+	exists = tcf_idr_check(tn, parm->index, a, bind);
 	if (exists && bind)
 		return 0;
 
 	if (!exists) {
-		ret = tcf_hash_create(tn, parm->index, est, a, &act_ife_ops,
-				      bind, false);
+		ret = tcf_idr_create(tn, parm->index, est, a, &act_ife_ops,
+				     bind, false);
 		if (ret)
 			return ret;
 		ret = ACT_P_CREATED;
 	} else {
-		tcf_hash_release(*a, bind);
+		tcf_idr_release(*a, bind);
 		if (!ovr)
 			return -EEXIST;
 	}
@@ -507,7 +505,7 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
 		if (err) {
 metadata_parse_err:
 			if (exists)
-				tcf_hash_release(*a, bind);
+				tcf_idr_release(*a, bind);
 			if (ret == ACT_P_CREATED)
 				_tcf_ife_cleanup(*a, bind);
 
@@ -541,7 +539,7 @@ metadata_parse_err:
 		spin_unlock_bh(&ife->tcf_lock);
 
 	if (ret == ACT_P_CREATED)
-		tcf_hash_insert(tn, *a);
+		tcf_idr_insert(tn, *a);
 
 	return ret;
 }
@@ -800,7 +798,7 @@ static int tcf_ife_search(struct net *net, struct tc_action **a, u32 index)
 {
 	struct tc_action_net *tn = net_generic(net, ife_net_id);
 
-	return tcf_hash_search(tn, a, index);
+	return tcf_idr_search(tn, a, index);
 }
 
 static struct tc_action_ops act_ife_ops = {
@@ -820,7 +818,7 @@ static __net_init int ife_init_net(struct net *net)
 {
 	struct tc_action_net *tn = net_generic(net, ife_net_id);
 
-	return tc_action_net_init(tn, &act_ife_ops, IFE_TAB_MASK);
+	return tc_action_net_init(tn, &act_ife_ops);
 }
 
 static void __net_exit ife_exit_net(struct net *net)
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 541707802a23..d9e399a7e3d5 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -28,8 +28,6 @@
 #include <linux/netfilter_ipv4/ip_tables.h>
 
 
-#define IPT_TAB_MASK     15
-
 static unsigned int ipt_net_id;
 static struct tc_action_ops act_ipt_ops;
 
@@ -118,33 +116,33 @@ static int __tcf_ipt_init(struct net *net, unsigned int id, struct nlattr *nla,
 	if (tb[TCA_IPT_INDEX] != NULL)
 		index = nla_get_u32(tb[TCA_IPT_INDEX]);
 
-	exists = tcf_hash_check(tn, index, a, bind);
+	exists = tcf_idr_check(tn, index, a, bind);
 	if (exists && bind)
 		return 0;
 
 	if (tb[TCA_IPT_HOOK] == NULL || tb[TCA_IPT_TARG] == NULL) {
 		if (exists)
-			tcf_hash_release(*a, bind);
+			tcf_idr_release(*a, bind);
 		return -EINVAL;
 	}
 
 	td = (struct xt_entry_target *)nla_data(tb[TCA_IPT_TARG]);
 	if (nla_len(tb[TCA_IPT_TARG]) < td->u.target_size) {
 		if (exists)
-			tcf_hash_release(*a, bind);
+			tcf_idr_release(*a, bind);
 		return -EINVAL;
 	}
 
 	if (!exists) {
-		ret = tcf_hash_create(tn, index, est, a, ops, bind,
-				      false);
+		ret = tcf_idr_create(tn, index, est, a, ops, bind,
+				     false);
 		if (ret)
 			return ret;
 		ret = ACT_P_CREATED;
 	} else {
 		if (bind)/* dont override defaults */
 			return 0;
-		tcf_hash_release(*a, bind);
+		tcf_idr_release(*a, bind);
 
 		if (!ovr)
 			return -EEXIST;
@@ -180,7 +178,7 @@ static int __tcf_ipt_init(struct net *net, unsigned int id, struct nlattr *nla,
 	ipt->tcfi_hook  = hook;
 	spin_unlock_bh(&ipt->tcf_lock);
 	if (ret == ACT_P_CREATED)
-		tcf_hash_insert(tn, *a);
+		tcf_idr_insert(tn, *a);
 	return ret;
 
 err3:
@@ -189,7 +187,7 @@ err2:
 	kfree(tname);
 err1:
 	if (ret == ACT_P_CREATED)
-		tcf_hash_cleanup(*a, est);
+		tcf_idr_cleanup(*a, est);
 	return err;
 }
 
@@ -316,7 +314,7 @@ static int tcf_ipt_search(struct net *net, struct tc_action **a, u32 index)
 {
 	struct tc_action_net *tn = net_generic(net, ipt_net_id);
 
-	return tcf_hash_search(tn, a, index);
+	return tcf_idr_search(tn, a, index);
 }
 
 static struct tc_action_ops act_ipt_ops = {
@@ -336,7 +334,7 @@ static __net_init int ipt_init_net(struct net *net)
 {
 	struct tc_action_net *tn = net_generic(net, ipt_net_id);
 
-	return tc_action_net_init(tn, &act_ipt_ops, IPT_TAB_MASK);
+	return tc_action_net_init(tn, &act_ipt_ops);
 }
 
 static void __net_exit ipt_exit_net(struct net *net)
@@ -366,7 +364,7 @@ static int tcf_xt_search(struct net *net, struct tc_action **a, u32 index)
 {
 	struct tc_action_net *tn = net_generic(net, xt_net_id);
 
-	return tcf_hash_search(tn, a, index);
+	return tcf_idr_search(tn, a, index);
 }
 
 static struct tc_action_ops act_xt_ops = {
@@ -386,7 +384,7 @@ static __net_init int xt_init_net(struct net *net)
 {
 	struct tc_action_net *tn = net_generic(net, xt_net_id);
 
-	return tc_action_net_init(tn, &act_xt_ops, IPT_TAB_MASK);
+	return tc_action_net_init(tn, &act_xt_ops);
 }
 
 static void __net_exit xt_exit_net(struct net *net)
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 1b5549ababd4..416627c66f08 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -28,7 +28,6 @@
 #include <linux/tc_act/tc_mirred.h>
 #include <net/tc_act/tc_mirred.h>
 
-#define MIRRED_TAB_MASK     7
 static LIST_HEAD(mirred_list);
 static DEFINE_SPINLOCK(mirred_list_lock);
 
@@ -94,7 +93,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
 		return -EINVAL;
 	parm = nla_data(tb[TCA_MIRRED_PARMS]);
 
-	exists = tcf_hash_check(tn, parm->index, a, bind);
+	exists = tcf_idr_check(tn, parm->index, a, bind);
 	if (exists && bind)
 		return 0;
 
@@ -106,14 +105,14 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
 		break;
 	default:
 		if (exists)
-			tcf_hash_release(*a, bind);
+			tcf_idr_release(*a, bind);
 		return -EINVAL;
 	}
 	if (parm->ifindex) {
 		dev = __dev_get_by_index(net, parm->ifindex);
 		if (dev == NULL) {
 			if (exists)
-				tcf_hash_release(*a, bind);
+				tcf_idr_release(*a, bind);
 			return -ENODEV;
 		}
 		mac_header_xmit = dev_is_mac_header_xmit(dev);
@@ -124,13 +123,13 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
 	if (!exists) {
 		if (dev == NULL)
 			return -EINVAL;
-		ret = tcf_hash_create(tn, parm->index, est, a,
-				      &act_mirred_ops, bind, true);
+		ret = tcf_idr_create(tn, parm->index, est, a,
+				     &act_mirred_ops, bind, true);
 		if (ret)
 			return ret;
 		ret = ACT_P_CREATED;
 	} else {
-		tcf_hash_release(*a, bind);
+		tcf_idr_release(*a, bind);
 		if (!ovr)
 			return -EEXIST;
 	}
@@ -152,7 +151,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
 		spin_lock_bh(&mirred_list_lock);
 		list_add(&m->tcfm_list, &mirred_list);
 		spin_unlock_bh(&mirred_list_lock);
-		tcf_hash_insert(tn, *a);
+		tcf_idr_insert(tn, *a);
 	}
 
 	return ret;
@@ -283,7 +282,7 @@ static int tcf_mirred_search(struct net *net, struct tc_action **a, u32 index)
 {
 	struct tc_action_net *tn = net_generic(net, mirred_net_id);
 
-	return tcf_hash_search(tn, a, index);
+	return tcf_idr_search(tn, a, index);
 }
 
 static int mirred_device_event(struct notifier_block *unused,
@@ -344,7 +343,7 @@ static __net_init int mirred_init_net(struct net *net)
 {
 	struct tc_action_net *tn = net_generic(net, mirred_net_id);
 
-	return tc_action_net_init(tn, &act_mirred_ops, MIRRED_TAB_MASK);
+	return tc_action_net_init(tn, &act_mirred_ops);
 }
 
 static void __net_exit mirred_exit_net(struct net *net)
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index 9016ab8a0649..c365d01b99c8 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -29,8 +29,6 @@
 #include <net/udp.h>
 
 
-#define NAT_TAB_MASK	15
-
 static unsigned int nat_net_id;
 static struct tc_action_ops act_nat_ops;
 
@@ -58,16 +56,16 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
 		return -EINVAL;
 	parm = nla_data(tb[TCA_NAT_PARMS]);
 
-	if (!tcf_hash_check(tn, parm->index, a, bind)) {
-		ret = tcf_hash_create(tn, parm->index, est, a,
-				      &act_nat_ops, bind, false);
+	if (!tcf_idr_check(tn, parm->index, a, bind)) {
+		ret = tcf_idr_create(tn, parm->index, est, a,
+				     &act_nat_ops, bind, false);
 		if (ret)
 			return ret;
 		ret = ACT_P_CREATED;
 	} else {
 		if (bind)
 			return 0;
-		tcf_hash_release(*a, bind);
+		tcf_idr_release(*a, bind);
 		if (!ovr)
 			return -EEXIST;
 	}
@@ -83,7 +81,7 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
 	spin_unlock_bh(&p->tcf_lock);
 
 	if (ret == ACT_P_CREATED)
-		tcf_hash_insert(tn, *a);
+		tcf_idr_insert(tn, *a);
 
 	return ret;
 }
@@ -290,7 +288,7 @@ static int tcf_nat_search(struct net *net, struct tc_action **a, u32 index)
 {
 	struct tc_action_net *tn = net_generic(net, nat_net_id);
 
-	return tcf_hash_search(tn, a, index);
+	return tcf_idr_search(tn, a, index);
 }
 
 static struct tc_action_ops act_nat_ops = {
@@ -309,7 +307,7 @@ static __net_init int nat_init_net(struct net *net)
 {
 	struct tc_action_net *tn = net_generic(net, nat_net_id);
 
-	return tc_action_net_init(tn, &act_nat_ops, NAT_TAB_MASK);
+	return tc_action_net_init(tn, &act_nat_ops);
 }
 
 static void __net_exit nat_exit_net(struct net *net)
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index 7dc5892671c8..491fe5deb09e 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -24,8 +24,6 @@
 #include <net/tc_act/tc_pedit.h>
 #include <uapi/linux/tc_act/tc_pedit.h>
 
-#define PEDIT_TAB_MASK	15
-
 static unsigned int pedit_net_id;
 static struct tc_action_ops act_pedit_ops;
 
@@ -168,17 +166,17 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
 	if (IS_ERR(keys_ex))
 		return PTR_ERR(keys_ex);
 
-	if (!tcf_hash_check(tn, parm->index, a, bind)) {
+	if (!tcf_idr_check(tn, parm->index, a, bind)) {
 		if (!parm->nkeys)
 			return -EINVAL;
-		ret = tcf_hash_create(tn, parm->index, est, a,
-				      &act_pedit_ops, bind, false);
+		ret = tcf_idr_create(tn, parm->index, est, a,
+				     &act_pedit_ops, bind, false);
 		if (ret)
 			return ret;
 		p = to_pedit(*a);
 		keys = kmalloc(ksize, GFP_KERNEL);
 		if (keys == NULL) {
-			tcf_hash_cleanup(*a, est);
+			tcf_idr_cleanup(*a, est);
 			kfree(keys_ex);
 			return -ENOMEM;
 		}
@@ -186,7 +184,7 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
 	} else {
 		if (bind)
 			return 0;
-		tcf_hash_release(*a, bind);
+		tcf_idr_release(*a, bind);
 		if (!ovr)
 			return -EEXIST;
 		p = to_pedit(*a);
@@ -214,7 +212,7 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
 
 	spin_unlock_bh(&p->tcf_lock);
 	if (ret == ACT_P_CREATED)
-		tcf_hash_insert(tn, *a);
+		tcf_idr_insert(tn, *a);
 	return ret;
 }
 
@@ -432,7 +430,7 @@ static int tcf_pedit_search(struct net *net, struct tc_action **a, u32 index)
 {
 	struct tc_action_net *tn = net_generic(net, pedit_net_id);
 
-	return tcf_hash_search(tn, a, index);
+	return tcf_idr_search(tn, a, index);
 }
 
 static struct tc_action_ops act_pedit_ops = {
@@ -452,7 +450,7 @@ static __net_init int pedit_init_net(struct net *net)
 {
 	struct tc_action_net *tn = net_generic(net, pedit_net_id);
 
-	return tc_action_net_init(tn, &act_pedit_ops, PEDIT_TAB_MASK);
+	return tc_action_net_init(tn, &act_pedit_ops);
 }
 
 static void __net_exit pedit_exit_net(struct net *net)
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index b062bc80c7cb..3bb2ebf9e9ae 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -40,8 +40,6 @@ struct tcf_police {
 
 #define to_police(pc) ((struct tcf_police *)pc)
 
-#define POL_TAB_MASK     15
-
 /* old policer structure from before tc actions */
 struct tc_police_compat {
 	u32			index;
@@ -101,18 +99,18 @@ static int tcf_act_police_init(struct net *net, struct nlattr *nla,
 		return -EINVAL;
 
 	parm = nla_data(tb[TCA_POLICE_TBF]);
-	exists = tcf_hash_check(tn, parm->index, a, bind);
+	exists = tcf_idr_check(tn, parm->index, a, bind);
 	if (exists && bind)
 		return 0;
 
 	if (!exists) {
-		ret = tcf_hash_create(tn, parm->index, NULL, a,
-				      &act_police_ops, bind, false);
+		ret = tcf_idr_create(tn, parm->index, NULL, a,
+				     &act_police_ops, bind, false);
 		if (ret)
 			return ret;
 		ret = ACT_P_CREATED;
 	} else {
-		tcf_hash_release(*a, bind);
+		tcf_idr_release(*a, bind);
 		if (!ovr)
 			return -EEXIST;
 	}
@@ -188,7 +186,7 @@ static int tcf_act_police_init(struct net *net, struct nlattr *nla,
 		return ret;
 
 	police->tcfp_t_c = ktime_get_ns();
-	tcf_hash_insert(tn, *a);
+	tcf_idr_insert(tn, *a);
 
 	return ret;
 
@@ -196,7 +194,7 @@ failure:
 	qdisc_put_rtab(P_tab);
 	qdisc_put_rtab(R_tab);
 	if (ret == ACT_P_CREATED)
-		tcf_hash_cleanup(*a, est);
+		tcf_idr_cleanup(*a, est);
 	return err;
 }
 
@@ -310,7 +308,7 @@ static int tcf_police_search(struct net *net, struct tc_action **a, u32 index)
 {
 	struct tc_action_net *tn = net_generic(net, police_net_id);
 
-	return tcf_hash_search(tn, a, index);
+	return tcf_idr_search(tn, a, index);
 }
 
 MODULE_AUTHOR("Alexey Kuznetsov");
@@ -333,7 +331,7 @@ static __net_init int police_init_net(struct net *net)
 {
 	struct tc_action_net *tn = net_generic(net, police_net_id);
 
-	return tc_action_net_init(tn, &act_police_ops, POL_TAB_MASK);
+	return tc_action_net_init(tn, &act_police_ops);
 }
 
 static void __net_exit police_exit_net(struct net *net)
diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c
index 59d6645a4007..ec986ae52808 100644
--- a/net/sched/act_sample.c
+++ b/net/sched/act_sample.c
@@ -25,7 +25,6 @@
 
 #include <linux/if_arp.h>
 
-#define SAMPLE_TAB_MASK     7
 static unsigned int sample_net_id;
 static struct tc_action_ops act_sample_ops;
 
@@ -59,18 +58,18 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla,
 
 	parm = nla_data(tb[TCA_SAMPLE_PARMS]);
 
-	exists = tcf_hash_check(tn, parm->index, a, bind);
+	exists = tcf_idr_check(tn, parm->index, a, bind);
 	if (exists && bind)
 		return 0;
 
 	if (!exists) {
-		ret = tcf_hash_create(tn, parm->index, est, a,
-				      &act_sample_ops, bind, false);
+		ret = tcf_idr_create(tn, parm->index, est, a,
+				     &act_sample_ops, bind, false);
 		if (ret)
 			return ret;
 		ret = ACT_P_CREATED;
 	} else {
-		tcf_hash_release(*a, bind);
+		tcf_idr_release(*a, bind);
 		if (!ovr)
 			return -EEXIST;
 	}
@@ -82,7 +81,7 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla,
 	psample_group = psample_group_get(net, s->psample_group_num);
 	if (!psample_group) {
 		if (ret == ACT_P_CREATED)
-			tcf_hash_release(*a, bind);
+			tcf_idr_release(*a, bind);
 		return -ENOMEM;
 	}
 	RCU_INIT_POINTER(s->psample_group, psample_group);
@@ -93,7 +92,7 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla,
 	}
 
 	if (ret == ACT_P_CREATED)
-		tcf_hash_insert(tn, *a);
+		tcf_idr_insert(tn, *a);
 	return ret;
 }
 
@@ -221,7 +220,7 @@ static int tcf_sample_search(struct net *net, struct tc_action **a, u32 index)
 {
 	struct tc_action_net *tn = net_generic(net, sample_net_id);
 
-	return tcf_hash_search(tn, a, index);
+	return tcf_idr_search(tn, a, index);
 }
 
 static struct tc_action_ops act_sample_ops = {
@@ -241,7 +240,7 @@ static __net_init int sample_init_net(struct net *net)
 {
 	struct tc_action_net *tn = net_generic(net, sample_net_id);
 
-	return tc_action_net_init(tn, &act_sample_ops, SAMPLE_TAB_MASK);
+	return tc_action_net_init(tn, &act_sample_ops);
 }
 
 static void __net_exit sample_exit_net(struct net *net)
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index 43605e7ce051..e7b57e5071a3 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -24,8 +24,6 @@
 #include <linux/tc_act/tc_defact.h>
 #include <net/tc_act/tc_defact.h>
 
-#define SIMP_TAB_MASK     7
-
 static unsigned int simp_net_id;
 static struct tc_action_ops act_simp_ops;
 
@@ -102,28 +100,28 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla,
 		return -EINVAL;
 
 	parm = nla_data(tb[TCA_DEF_PARMS]);
-	exists = tcf_hash_check(tn, parm->index, a, bind);
+	exists = tcf_idr_check(tn, parm->index, a, bind);
 	if (exists && bind)
 		return 0;
 
 	if (tb[TCA_DEF_DATA] == NULL) {
 		if (exists)
-			tcf_hash_release(*a, bind);
+			tcf_idr_release(*a, bind);
 		return -EINVAL;
 	}
 
 	defdata = nla_data(tb[TCA_DEF_DATA]);
 
 	if (!exists) {
-		ret = tcf_hash_create(tn, parm->index, est, a,
-				      &act_simp_ops, bind, false);
+		ret = tcf_idr_create(tn, parm->index, est, a,
+				     &act_simp_ops, bind, false);
 		if (ret)
 			return ret;
 
 		d = to_defact(*a);
 		ret = alloc_defdata(d, defdata);
 		if (ret < 0) {
-			tcf_hash_cleanup(*a, est);
+			tcf_idr_cleanup(*a, est);
 			return ret;
 		}
 		d->tcf_action = parm->action;
@@ -131,7 +129,7 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla,
 	} else {
 		d = to_defact(*a);
 
-		tcf_hash_release(*a, bind);
+		tcf_idr_release(*a, bind);
 		if (!ovr)
 			return -EEXIST;
 
@@ -139,7 +137,7 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla,
 	}
 
 	if (ret == ACT_P_CREATED)
-		tcf_hash_insert(tn, *a);
+		tcf_idr_insert(tn, *a);
 	return ret;
 }
 
@@ -183,7 +181,7 @@ static int tcf_simp_search(struct net *net, struct tc_action **a, u32 index)
 {
 	struct tc_action_net *tn = net_generic(net, simp_net_id);
 
-	return tcf_hash_search(tn, a, index);
+	return tcf_idr_search(tn, a, index);
 }
 
 static struct tc_action_ops act_simp_ops = {
@@ -203,7 +201,7 @@ static __net_init int simp_init_net(struct net *net)
 {
 	struct tc_action_net *tn = net_generic(net, simp_net_id);
 
-	return tc_action_net_init(tn, &act_simp_ops, SIMP_TAB_MASK);
+	return tc_action_net_init(tn, &act_simp_ops);
 }
 
 static void __net_exit simp_exit_net(struct net *net)
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index 6b3e65d7de0c..59949d61f20d 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -27,8 +27,6 @@
 #include <linux/tc_act/tc_skbedit.h>
 #include <net/tc_act/tc_skbedit.h>
 
-#define SKBEDIT_TAB_MASK     15
-
 static unsigned int skbedit_net_id;
 static struct tc_action_ops act_skbedit_ops;
 
@@ -118,18 +116,18 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
 
 	parm = nla_data(tb[TCA_SKBEDIT_PARMS]);
 
-	exists = tcf_hash_check(tn, parm->index, a, bind);
+	exists = tcf_idr_check(tn, parm->index, a, bind);
 	if (exists && bind)
 		return 0;
 
 	if (!flags) {
-		tcf_hash_release(*a, bind);
+		tcf_idr_release(*a, bind);
 		return -EINVAL;
 	}
 
 	if (!exists) {
-		ret = tcf_hash_create(tn, parm->index, est, a,
-				      &act_skbedit_ops, bind, false);
+		ret = tcf_idr_create(tn, parm->index, est, a,
+				     &act_skbedit_ops, bind, false);
 		if (ret)
 			return ret;
 
@@ -137,7 +135,7 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
 		ret = ACT_P_CREATED;
 	} else {
 		d = to_skbedit(*a);
-		tcf_hash_release(*a, bind);
+		tcf_idr_release(*a, bind);
 		if (!ovr)
 			return -EEXIST;
 	}
@@ -163,7 +161,7 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
 	spin_unlock_bh(&d->tcf_lock);
 
 	if (ret == ACT_P_CREATED)
-		tcf_hash_insert(tn, *a);
+		tcf_idr_insert(tn, *a);
 	return ret;
 }
 
@@ -221,7 +219,7 @@ static int tcf_skbedit_search(struct net *net, struct tc_action **a, u32 index)
 {
 	struct tc_action_net *tn = net_generic(net, skbedit_net_id);
 
-	return tcf_hash_search(tn, a, index);
+	return tcf_idr_search(tn, a, index);
 }
 
 static struct tc_action_ops act_skbedit_ops = {
@@ -240,7 +238,7 @@ static __net_init int skbedit_init_net(struct net *net)
 {
 	struct tc_action_net *tn = net_generic(net, skbedit_net_id);
 
-	return tc_action_net_init(tn, &act_skbedit_ops, SKBEDIT_TAB_MASK);
+	return tc_action_net_init(tn, &act_skbedit_ops);
 }
 
 static void __net_exit skbedit_exit_net(struct net *net)
diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c
index a73c4bbcada2..b642ad3d39dd 100644
--- a/net/sched/act_skbmod.c
+++ b/net/sched/act_skbmod.c
@@ -20,8 +20,6 @@
 #include <linux/tc_act/tc_skbmod.h>
 #include <net/tc_act/tc_skbmod.h>
 
-#define SKBMOD_TAB_MASK     15
-
 static unsigned int skbmod_net_id;
 static struct tc_action_ops act_skbmod_ops;
 
@@ -129,7 +127,7 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla,
 	if (parm->flags & SKBMOD_F_SWAPMAC)
 		lflags = SKBMOD_F_SWAPMAC;
 
-	exists = tcf_hash_check(tn, parm->index, a, bind);
+	exists = tcf_idr_check(tn, parm->index, a, bind);
 	if (exists && bind)
 		return 0;
 
@@ -137,14 +135,14 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla,
 		return -EINVAL;
 
 	if (!exists) {
-		ret = tcf_hash_create(tn, parm->index, est, a,
-				      &act_skbmod_ops, bind, true);
+		ret = tcf_idr_create(tn, parm->index, est, a,
+				     &act_skbmod_ops, bind, true);
 		if (ret)
 			return ret;
 
 		ret = ACT_P_CREATED;
 	} else {
-		tcf_hash_release(*a, bind);
+		tcf_idr_release(*a, bind);
 		if (!ovr)
 			return -EEXIST;
 	}
@@ -155,7 +153,7 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla,
 	p = kzalloc(sizeof(struct tcf_skbmod_params), GFP_KERNEL);
 	if (unlikely(!p)) {
 		if (ovr)
-			tcf_hash_release(*a, bind);
+			tcf_idr_release(*a, bind);
 		return -ENOMEM;
 	}
 
@@ -182,7 +180,7 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla,
 		kfree_rcu(p_old, rcu);
 
 	if (ret == ACT_P_CREATED)
-		tcf_hash_insert(tn, *a);
+		tcf_idr_insert(tn, *a);
 	return ret;
 }
 
@@ -245,7 +243,7 @@ static int tcf_skbmod_search(struct net *net, struct tc_action **a, u32 index)
 {
 	struct tc_action_net *tn = net_generic(net, skbmod_net_id);
 
-	return tcf_hash_search(tn, a, index);
+	return tcf_idr_search(tn, a, index);
 }
 
 static struct tc_action_ops act_skbmod_ops = {
@@ -265,7 +263,7 @@ static __net_init int skbmod_init_net(struct net *net)
 {
 	struct tc_action_net *tn = net_generic(net, skbmod_net_id);
 
-	return tc_action_net_init(tn, &act_skbmod_ops, SKBMOD_TAB_MASK);
+	return tc_action_net_init(tn, &act_skbmod_ops);
 }
 
 static void __net_exit skbmod_exit_net(struct net *net)
diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c
index fd7e75679c69..30c96274c638 100644
--- a/net/sched/act_tunnel_key.c
+++ b/net/sched/act_tunnel_key.c
@@ -20,8 +20,6 @@
 #include <linux/tc_act/tc_tunnel_key.h>
 #include <net/tc_act/tc_tunnel_key.h>
 
-#define TUNNEL_KEY_TAB_MASK     15
-
 static unsigned int tunnel_key_net_id;
 static struct tc_action_ops act_tunnel_key_ops;
 
@@ -100,7 +98,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
 		return -EINVAL;
 
 	parm = nla_data(tb[TCA_TUNNEL_KEY_PARMS]);
-	exists = tcf_hash_check(tn, parm->index, a, bind);
+	exists = tcf_idr_check(tn, parm->index, a, bind);
 	if (exists && bind)
 		return 0;
 
@@ -159,14 +157,14 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
 	}
 
 	if (!exists) {
-		ret = tcf_hash_create(tn, parm->index, est, a,
-				      &act_tunnel_key_ops, bind, true);
+		ret = tcf_idr_create(tn, parm->index, est, a,
+				     &act_tunnel_key_ops, bind, true);
 		if (ret)
 			return ret;
 
 		ret = ACT_P_CREATED;
 	} else {
-		tcf_hash_release(*a, bind);
+		tcf_idr_release(*a, bind);
 		if (!ovr)
 			return -EEXIST;
 	}
@@ -177,7 +175,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
 	params_new = kzalloc(sizeof(*params_new), GFP_KERNEL);
 	if (unlikely(!params_new)) {
 		if (ret == ACT_P_CREATED)
-			tcf_hash_release(*a, bind);
+			tcf_idr_release(*a, bind);
 		return -ENOMEM;
 	}
 
@@ -193,13 +191,13 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
 		kfree_rcu(params_old, rcu);
 
 	if (ret == ACT_P_CREATED)
-		tcf_hash_insert(tn, *a);
+		tcf_idr_insert(tn, *a);
 
 	return ret;
 
 err_out:
 	if (exists)
-		tcf_hash_release(*a, bind);
+		tcf_idr_release(*a, bind);
 	return ret;
 }
 
@@ -304,7 +302,7 @@ static int tunnel_key_search(struct net *net, struct tc_action **a, u32 index)
 {
 	struct tc_action_net *tn = net_generic(net, tunnel_key_net_id);
 
-	return tcf_hash_search(tn, a, index);
+	return tcf_idr_search(tn, a, index);
 }
 
 static struct tc_action_ops act_tunnel_key_ops = {
@@ -324,7 +322,7 @@ static __net_init int tunnel_key_init_net(struct net *net)
 {
 	struct tc_action_net *tn = net_generic(net, tunnel_key_net_id);
 
-	return tc_action_net_init(tn, &act_tunnel_key_ops, TUNNEL_KEY_TAB_MASK);
+	return tc_action_net_init(tn, &act_tunnel_key_ops);
 }
 
 static void __net_exit tunnel_key_exit_net(struct net *net)
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index 13ba3a89f675..16eb067a8d8f 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -19,8 +19,6 @@
 #include <linux/tc_act/tc_vlan.h>
 #include <net/tc_act/tc_vlan.h>
 
-#define VLAN_TAB_MASK     15
-
 static unsigned int vlan_net_id;
 static struct tc_action_ops act_vlan_ops;
 
@@ -128,7 +126,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
 	if (!tb[TCA_VLAN_PARMS])
 		return -EINVAL;
 	parm = nla_data(tb[TCA_VLAN_PARMS]);
-	exists = tcf_hash_check(tn, parm->index, a, bind);
+	exists = tcf_idr_check(tn, parm->index, a, bind);
 	if (exists && bind)
 		return 0;
 
@@ -139,13 +137,13 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
 	case TCA_VLAN_ACT_MODIFY:
 		if (!tb[TCA_VLAN_PUSH_VLAN_ID]) {
 			if (exists)
-				tcf_hash_release(*a, bind);
+				tcf_idr_release(*a, bind);
 			return -EINVAL;
 		}
 		push_vid = nla_get_u16(tb[TCA_VLAN_PUSH_VLAN_ID]);
 		if (push_vid >= VLAN_VID_MASK) {
 			if (exists)
-				tcf_hash_release(*a, bind);
+				tcf_idr_release(*a, bind);
 			return -ERANGE;
 		}
 
@@ -167,20 +165,20 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
 		break;
 	default:
 		if (exists)
-			tcf_hash_release(*a, bind);
+			tcf_idr_release(*a, bind);
 		return -EINVAL;
 	}
 	action = parm->v_action;
 
 	if (!exists) {
-		ret = tcf_hash_create(tn, parm->index, est, a,
-				      &act_vlan_ops, bind, false);
+		ret = tcf_idr_create(tn, parm->index, est, a,
+				     &act_vlan_ops, bind, false);
 		if (ret)
 			return ret;
 
 		ret = ACT_P_CREATED;
 	} else {
-		tcf_hash_release(*a, bind);
+		tcf_idr_release(*a, bind);
 		if (!ovr)
 			return -EEXIST;
 	}
@@ -199,7 +197,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
 	spin_unlock_bh(&v->tcf_lock);
 
 	if (ret == ACT_P_CREATED)
-		tcf_hash_insert(tn, *a);
+		tcf_idr_insert(tn, *a);
 	return ret;
 }
 
@@ -252,7 +250,7 @@ static int tcf_vlan_search(struct net *net, struct tc_action **a, u32 index)
 {
 	struct tc_action_net *tn = net_generic(net, vlan_net_id);
 
-	return tcf_hash_search(tn, a, index);
+	return tcf_idr_search(tn, a, index);
 }
 
 static struct tc_action_ops act_vlan_ops = {
@@ -271,7 +269,7 @@ static __net_init int vlan_init_net(struct net *net)
 {
 	struct tc_action_net *tn = net_generic(net, vlan_net_id);
 
-	return tc_action_net_init(tn, &act_vlan_ops, VLAN_TAB_MASK);
+	return tc_action_net_init(tn, &act_vlan_ops);
 }
 
 static void __net_exit vlan_exit_net(struct net *net)
-- 
cgit v1.2.3


From 47ebcc0bb1d5eb7f1b1eeab675409ea7f67b4a5c Mon Sep 17 00:00:00 2001
From: Yossi Kuperman <yossiku@mellanox.com>
Date: Wed, 30 Aug 2017 11:30:39 +0300
Subject: xfrm: Add support for network devices capable of removing the ESP
 trailer

In conjunction with crypto offload [1], removing the ESP trailer by
hardware can potentially improve the performance by avoiding (1) a
cache miss incurred by reading the nexthdr field and (2) the necessity
to calculate the csum value of the trailer in order to keep skb->csum
valid.

This patch introduces the changes to the xfrm stack and merely serves
as an infrastructure. Subsequent patch to mlx5 driver will put this to
a good use.

[1] https://www.mail-archive.com/netdev@vger.kernel.org/msg175733.html

Signed-off-by: Yossi Kuperman <yossiku@mellanox.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/net/xfrm.h    |  1 +
 net/ipv4/esp4.c       | 70 ++++++++++++++++++++++++++++++++++-----------------
 net/ipv6/esp6.c       | 51 ++++++++++++++++++++++++++-----------
 net/xfrm/xfrm_input.c |  5 ++++
 4 files changed, 89 insertions(+), 38 deletions(-)

(limited to 'include/net')

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 9c7b70cce6d6..f002a2c5e33c 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1019,6 +1019,7 @@ struct xfrm_offload {
 #define	CRYPTO_FALLBACK		8
 #define	XFRM_GSO_SEGMENT	16
 #define	XFRM_GRO		32
+#define	XFRM_ESP_NO_TRAILER	64
 
 	__u32			status;
 #define CRYPTO_SUCCESS				1
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 741acd7b9646..319000573bc7 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -499,19 +499,59 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
 	return esp_output_tail(x, skb, &esp);
 }
 
+static inline int esp_remove_trailer(struct sk_buff *skb)
+{
+	struct xfrm_state *x = xfrm_input_state(skb);
+	struct xfrm_offload *xo = xfrm_offload(skb);
+	struct crypto_aead *aead = x->data;
+	int alen, hlen, elen;
+	int padlen, trimlen;
+	__wsum csumdiff;
+	u8 nexthdr[2];
+	int ret;
+
+	alen = crypto_aead_authsize(aead);
+	hlen = sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead);
+	elen = skb->len - hlen;
+
+	if (xo && (xo->flags & XFRM_ESP_NO_TRAILER)) {
+		ret = xo->proto;
+		goto out;
+	}
+
+	if (skb_copy_bits(skb, skb->len - alen - 2, nexthdr, 2))
+		BUG();
+
+	ret = -EINVAL;
+	padlen = nexthdr[0];
+	if (padlen + 2 + alen >= elen) {
+		net_dbg_ratelimited("ipsec esp packet is garbage padlen=%d, elen=%d\n",
+				    padlen + 2, elen - alen);
+		goto out;
+	}
+
+	trimlen = alen + padlen + 2;
+	if (skb->ip_summed == CHECKSUM_COMPLETE) {
+		csumdiff = skb_checksum(skb, skb->len - trimlen, trimlen, 0);
+		skb->csum = csum_block_sub(skb->csum, csumdiff,
+					   skb->len - trimlen);
+	}
+	pskb_trim(skb, skb->len - trimlen);
+
+	ret = nexthdr[1];
+
+out:
+	return ret;
+}
+
 int esp_input_done2(struct sk_buff *skb, int err)
 {
 	const struct iphdr *iph;
 	struct xfrm_state *x = xfrm_input_state(skb);
 	struct xfrm_offload *xo = xfrm_offload(skb);
 	struct crypto_aead *aead = x->data;
-	int alen = crypto_aead_authsize(aead);
 	int hlen = sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead);
-	int elen = skb->len - hlen;
 	int ihl;
-	u8 nexthdr[2];
-	int padlen, trimlen;
-	__wsum csumdiff;
 
 	if (!xo || (xo && !(xo->flags & CRYPTO_DONE)))
 		kfree(ESP_SKB_CB(skb)->tmp);
@@ -519,16 +559,10 @@ int esp_input_done2(struct sk_buff *skb, int err)
 	if (unlikely(err))
 		goto out;
 
-	if (skb_copy_bits(skb, skb->len-alen-2, nexthdr, 2))
-		BUG();
-
-	err = -EINVAL;
-	padlen = nexthdr[0];
-	if (padlen + 2 + alen >= elen)
+	err = esp_remove_trailer(skb);
+	if (unlikely(err < 0))
 		goto out;
 
-	/* ... check padding bits here. Silly. :-) */
-
 	iph = ip_hdr(skb);
 	ihl = iph->ihl * 4;
 
@@ -569,22 +603,12 @@ int esp_input_done2(struct sk_buff *skb, int err)
 			skb->ip_summed = CHECKSUM_UNNECESSARY;
 	}
 
-	trimlen = alen + padlen + 2;
-	if (skb->ip_summed == CHECKSUM_COMPLETE) {
-		csumdiff = skb_checksum(skb, skb->len - trimlen, trimlen, 0);
-		skb->csum = csum_block_sub(skb->csum, csumdiff,
-					   skb->len - trimlen);
-	}
-	pskb_trim(skb, skb->len - trimlen);
-
 	skb_pull_rcsum(skb, hlen);
 	if (x->props.mode == XFRM_MODE_TUNNEL)
 		skb_reset_transport_header(skb);
 	else
 		skb_set_transport_header(skb, -ihl);
 
-	err = nexthdr[1];
-
 	/* RFC4303: Drop dummy packets without any error */
 	if (err == IPPROTO_NONE)
 		err = -EINVAL;
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 74bde202eb9a..7fb41b0ad437 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -461,29 +461,30 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
 	return esp6_output_tail(x, skb, &esp);
 }
 
-int esp6_input_done2(struct sk_buff *skb, int err)
+static inline int esp_remove_trailer(struct sk_buff *skb)
 {
 	struct xfrm_state *x = xfrm_input_state(skb);
 	struct xfrm_offload *xo = xfrm_offload(skb);
 	struct crypto_aead *aead = x->data;
-	int alen = crypto_aead_authsize(aead);
-	int hlen = sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead);
-	int elen = skb->len - hlen;
-	int hdr_len = skb_network_header_len(skb);
+	int alen, hlen, elen;
 	int padlen, trimlen;
 	__wsum csumdiff;
 	u8 nexthdr[2];
+	int ret;
 
-	if (!xo || (xo && !(xo->flags & CRYPTO_DONE)))
-		kfree(ESP_SKB_CB(skb)->tmp);
+	alen = crypto_aead_authsize(aead);
+	hlen = sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead);
+	elen = skb->len - hlen;
 
-	if (unlikely(err))
+	if (xo && (xo->flags & XFRM_ESP_NO_TRAILER)) {
+		ret = xo->proto;
 		goto out;
+	}
 
 	if (skb_copy_bits(skb, skb->len - alen - 2, nexthdr, 2))
 		BUG();
 
-	err = -EINVAL;
+	ret = -EINVAL;
 	padlen = nexthdr[0];
 	if (padlen + 2 + alen >= elen) {
 		net_dbg_ratelimited("ipsec esp packet is garbage padlen=%d, elen=%d\n",
@@ -491,26 +492,46 @@ int esp6_input_done2(struct sk_buff *skb, int err)
 		goto out;
 	}
 
-	/* ... check padding bits here. Silly. :-) */
-
 	trimlen = alen + padlen + 2;
 	if (skb->ip_summed == CHECKSUM_COMPLETE) {
-		skb_postpull_rcsum(skb, skb_network_header(skb),
-				   skb_network_header_len(skb));
 		csumdiff = skb_checksum(skb, skb->len - trimlen, trimlen, 0);
 		skb->csum = csum_block_sub(skb->csum, csumdiff,
 					   skb->len - trimlen);
 	}
 	pskb_trim(skb, skb->len - trimlen);
 
+	ret = nexthdr[1];
+
+out:
+	return ret;
+}
+
+int esp6_input_done2(struct sk_buff *skb, int err)
+{
+	struct xfrm_state *x = xfrm_input_state(skb);
+	struct xfrm_offload *xo = xfrm_offload(skb);
+	struct crypto_aead *aead = x->data;
+	int hlen = sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead);
+	int hdr_len = skb_network_header_len(skb);
+
+	if (!xo || (xo && !(xo->flags & CRYPTO_DONE)))
+		kfree(ESP_SKB_CB(skb)->tmp);
+
+	if (unlikely(err))
+		goto out;
+
+	err = esp_remove_trailer(skb);
+	if (unlikely(err < 0))
+		goto out;
+
+	skb_postpull_rcsum(skb, skb_network_header(skb),
+			   skb_network_header_len(skb));
 	skb_pull_rcsum(skb, hlen);
 	if (x->props.mode == XFRM_MODE_TUNNEL)
 		skb_reset_transport_header(skb);
 	else
 		skb_set_transport_header(skb, -hdr_len);
 
-	err = nexthdr[1];
-
 	/* RFC4303: Drop dummy packets without any error */
 	if (err == IPPROTO_NONE)
 		err = -EINVAL;
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index f07eec59dcae..2515cd2bc5db 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -247,6 +247,11 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
 					goto drop;
 				}
 
+				if (xo->status & CRYPTO_INVALID_PROTOCOL) {
+					XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEPROTOERROR);
+					goto drop;
+				}
+
 				XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR);
 				goto drop;
 			}
-- 
cgit v1.2.3


From 07d79fc7d94e3f884b8b1c95aa615b202bb5e4c1 Mon Sep 17 00:00:00 2001
From: Cong Wang <xiyou.wangcong@gmail.com>
Date: Wed, 30 Aug 2017 14:30:36 -0700
Subject: net_sched: add reverse binding for tc class

TC filters when used as classifiers are bound to TC classes.
However, there is a hidden difference when adding them in different
orders:

1. If we add tc classes before its filters, everything is fine.
   Logically, the classes exist before we specify their ID's in
   filters, it is easy to bind them together, just as in the current
   code base.

2. If we add tc filters before the tc classes they bind, we have to
   do dynamic lookup in fast path. What's worse, this happens all
   the time not just once, because on fast path tcf_result is passed
   on stack, there is no way to propagate back to the one in tc filters.

This hidden difference hurts performance silently if we have many tc
classes in hierarchy.

This patch intends to close this gap by doing the reverse binding when
we create a new class, in this case we can actually search all the
filters in its parent, match and fixup by classid. And because
tcf_result is specific to each type of tc filter, we have to introduce
a new ops for each filter to tell how to bind the class.

Note, we still can NOT totally get rid of those class lookup in
->enqueue() because cgroup and flow filters have no way to determine
the classid at setup time, they still have to go through dynamic lookup.

Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h |  1 +
 net/sched/cls_basic.c     |  9 +++++++
 net/sched/cls_bpf.c       |  9 +++++++
 net/sched/cls_flower.c    |  9 +++++++
 net/sched/cls_fw.c        |  9 +++++++
 net/sched/cls_matchall.c  |  9 +++++++
 net/sched/cls_route.c     |  9 +++++++
 net/sched/cls_rsvp.h      |  9 +++++++
 net/sched/cls_tcindex.c   |  9 +++++++
 net/sched/cls_u32.c       |  9 +++++++
 net/sched/sch_api.c       | 68 +++++++++++++++++++++++++++++++++++++++++++++--
 11 files changed, 148 insertions(+), 2 deletions(-)

(limited to 'include/net')

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index c30b634c5f82..d6247a3c40df 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -217,6 +217,7 @@ struct tcf_proto_ops {
 					void **, bool);
 	int			(*delete)(struct tcf_proto*, void *, bool*);
 	void			(*walk)(struct tcf_proto*, struct tcf_walker *arg);
+	void			(*bind_class)(void *, u32, unsigned long);
 
 	/* rtnetlink specific */
 	int			(*dump)(struct net*, struct tcf_proto*, void *,
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index 73cc7f167a38..d89ebafd2239 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -235,6 +235,14 @@ skip:
 	}
 }
 
+static void basic_bind_class(void *fh, u32 classid, unsigned long cl)
+{
+	struct basic_filter *f = fh;
+
+	if (f && f->res.classid == classid)
+		f->res.class = cl;
+}
+
 static int basic_dump(struct net *net, struct tcf_proto *tp, void *fh,
 		      struct sk_buff *skb, struct tcmsg *t)
 {
@@ -280,6 +288,7 @@ static struct tcf_proto_ops cls_basic_ops __read_mostly = {
 	.delete		=	basic_delete,
 	.walk		=	basic_walk,
 	.dump		=	basic_dump,
+	.bind_class	=	basic_bind_class,
 	.owner		=	THIS_MODULE,
 };
 
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index 6f2dffe30f25..520c5027646a 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -607,6 +607,14 @@ nla_put_failure:
 	return -1;
 }
 
+static void cls_bpf_bind_class(void *fh, u32 classid, unsigned long cl)
+{
+	struct cls_bpf_prog *prog = fh;
+
+	if (prog && prog->res.classid == classid)
+		prog->res.class = cl;
+}
+
 static void cls_bpf_walk(struct tcf_proto *tp, struct tcf_walker *arg)
 {
 	struct cls_bpf_head *head = rtnl_dereference(tp->root);
@@ -635,6 +643,7 @@ static struct tcf_proto_ops cls_bpf_ops __read_mostly = {
 	.delete		=	cls_bpf_delete,
 	.walk		=	cls_bpf_walk,
 	.dump		=	cls_bpf_dump,
+	.bind_class	=	cls_bpf_bind_class,
 };
 
 static int __init cls_bpf_init_mod(void)
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 3d041d279b92..1a267e77c6de 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -1351,6 +1351,14 @@ nla_put_failure:
 	return -1;
 }
 
+static void fl_bind_class(void *fh, u32 classid, unsigned long cl)
+{
+	struct cls_fl_filter *f = fh;
+
+	if (f && f->res.classid == classid)
+		f->res.class = cl;
+}
+
 static struct tcf_proto_ops cls_fl_ops __read_mostly = {
 	.kind		= "flower",
 	.classify	= fl_classify,
@@ -1361,6 +1369,7 @@ static struct tcf_proto_ops cls_fl_ops __read_mostly = {
 	.delete		= fl_delete,
 	.walk		= fl_walk,
 	.dump		= fl_dump,
+	.bind_class	= fl_bind_class,
 	.owner		= THIS_MODULE,
 };
 
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index 192255ec50bd..941245ad07fd 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -412,6 +412,14 @@ nla_put_failure:
 	return -1;
 }
 
+static void fw_bind_class(void *fh, u32 classid, unsigned long cl)
+{
+	struct fw_filter *f = fh;
+
+	if (f && f->res.classid == classid)
+		f->res.class = cl;
+}
+
 static struct tcf_proto_ops cls_fw_ops __read_mostly = {
 	.kind		=	"fw",
 	.classify	=	fw_classify,
@@ -422,6 +430,7 @@ static struct tcf_proto_ops cls_fw_ops __read_mostly = {
 	.delete		=	fw_delete,
 	.walk		=	fw_walk,
 	.dump		=	fw_dump,
+	.bind_class	=	fw_bind_class,
 	.owner		=	THIS_MODULE,
 };
 
diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c
index d4dc387f7a56..21cc45caf842 100644
--- a/net/sched/cls_matchall.c
+++ b/net/sched/cls_matchall.c
@@ -251,6 +251,14 @@ nla_put_failure:
 	return -1;
 }
 
+static void mall_bind_class(void *fh, u32 classid, unsigned long cl)
+{
+	struct cls_mall_head *head = fh;
+
+	if (head && head->res.classid == classid)
+		head->res.class = cl;
+}
+
 static struct tcf_proto_ops cls_mall_ops __read_mostly = {
 	.kind		= "matchall",
 	.classify	= mall_classify,
@@ -261,6 +269,7 @@ static struct tcf_proto_ops cls_mall_ops __read_mostly = {
 	.delete		= mall_delete,
 	.walk		= mall_walk,
 	.dump		= mall_dump,
+	.bind_class	= mall_bind_class,
 	.owner		= THIS_MODULE,
 };
 
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index 3b70982394ce..9ddde65915d2 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -624,6 +624,14 @@ nla_put_failure:
 	return -1;
 }
 
+static void route4_bind_class(void *fh, u32 classid, unsigned long cl)
+{
+	struct route4_filter *f = fh;
+
+	if (f && f->res.classid == classid)
+		f->res.class = cl;
+}
+
 static struct tcf_proto_ops cls_route4_ops __read_mostly = {
 	.kind		=	"route",
 	.classify	=	route4_classify,
@@ -634,6 +642,7 @@ static struct tcf_proto_ops cls_route4_ops __read_mostly = {
 	.delete		=	route4_delete,
 	.walk		=	route4_walk,
 	.dump		=	route4_dump,
+	.bind_class	=	route4_bind_class,
 	.owner		=	THIS_MODULE,
 };
 
diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h
index 26203ff817f3..98c05db85bcb 100644
--- a/net/sched/cls_rsvp.h
+++ b/net/sched/cls_rsvp.h
@@ -723,6 +723,14 @@ nla_put_failure:
 	return -1;
 }
 
+static void rsvp_bind_class(void *fh, u32 classid, unsigned long cl)
+{
+	struct rsvp_filter *f = fh;
+
+	if (f && f->res.classid == classid)
+		f->res.class = cl;
+}
+
 static struct tcf_proto_ops RSVP_OPS __read_mostly = {
 	.kind		=	RSVP_ID,
 	.classify	=	rsvp_classify,
@@ -733,6 +741,7 @@ static struct tcf_proto_ops RSVP_OPS __read_mostly = {
 	.delete		=	rsvp_delete,
 	.walk		=	rsvp_walk,
 	.dump		=	rsvp_dump,
+	.bind_class	=	rsvp_bind_class,
 	.owner		=	THIS_MODULE,
 };
 
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index fb281b9b2c52..14a7e08b2fa9 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -606,6 +606,14 @@ nla_put_failure:
 	return -1;
 }
 
+static void tcindex_bind_class(void *fh, u32 classid, unsigned long cl)
+{
+	struct tcindex_filter_result *r = fh;
+
+	if (r && r->res.classid == classid)
+		r->res.class = cl;
+}
+
 static struct tcf_proto_ops cls_tcindex_ops __read_mostly = {
 	.kind		=	"tcindex",
 	.classify	=	tcindex_classify,
@@ -616,6 +624,7 @@ static struct tcf_proto_ops cls_tcindex_ops __read_mostly = {
 	.delete		=	tcindex_delete,
 	.walk		=	tcindex_walk,
 	.dump		=	tcindex_dump,
+	.bind_class	=	tcindex_bind_class,
 	.owner		=	THIS_MODULE,
 };
 
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 99ea4c74dd5b..10b8d851fc6b 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -1112,6 +1112,14 @@ static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg)
 	}
 }
 
+static void u32_bind_class(void *fh, u32 classid, unsigned long cl)
+{
+	struct tc_u_knode *n = fh;
+
+	if (n && n->res.classid == classid)
+		n->res.class = cl;
+}
+
 static int u32_dump(struct net *net, struct tcf_proto *tp, void *fh,
 		    struct sk_buff *skb, struct tcmsg *t)
 {
@@ -1242,6 +1250,7 @@ static struct tcf_proto_ops cls_u32_ops __read_mostly = {
 	.delete		=	u32_delete,
 	.walk		=	u32_walk,
 	.dump		=	u32_dump,
+	.bind_class	=	u32_bind_class,
 	.owner		=	THIS_MODULE,
 };
 
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index e7f8e4bfd4ec..929b024f41ba 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -35,6 +35,7 @@
 #include <net/sock.h>
 #include <net/netlink.h>
 #include <net/pkt_sched.h>
+#include <net/pkt_cls.h>
 
 /*
 
@@ -1648,6 +1649,64 @@ static int tclass_del_notify(struct net *net,
 			      n->nlmsg_flags & NLM_F_ECHO);
 }
 
+#ifdef CONFIG_NET_CLS
+
+struct tcf_bind_args {
+	struct tcf_walker w;
+	u32 classid;
+	unsigned long cl;
+};
+
+static int tcf_node_bind(struct tcf_proto *tp, void *n, struct tcf_walker *arg)
+{
+	struct tcf_bind_args *a = (void *)arg;
+
+	if (tp->ops->bind_class) {
+		tcf_tree_lock(tp);
+		tp->ops->bind_class(n, a->classid, a->cl);
+		tcf_tree_unlock(tp);
+	}
+	return 0;
+}
+
+static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid,
+			   unsigned long new_cl)
+{
+	const struct Qdisc_class_ops *cops = q->ops->cl_ops;
+	struct tcf_block *block;
+	struct tcf_chain *chain;
+	unsigned long cl;
+
+	cl = cops->find(q, portid);
+	if (!cl)
+		return;
+	block = cops->tcf_block(q, cl);
+	if (!block)
+		return;
+	list_for_each_entry(chain, &block->chain_list, list) {
+		struct tcf_proto *tp;
+
+		for (tp = rtnl_dereference(chain->filter_chain);
+		     tp; tp = rtnl_dereference(tp->next)) {
+			struct tcf_bind_args arg = {};
+
+			arg.w.fn = tcf_node_bind;
+			arg.classid = clid;
+			arg.cl = new_cl;
+			tp->ops->walk(tp, &arg.w);
+		}
+	}
+}
+
+#else
+
+static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid,
+			   unsigned long new_cl)
+{
+}
+
+#endif
+
 static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
 			 struct netlink_ext_ack *extack)
 {
@@ -1753,6 +1812,8 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
 			break;
 		case RTM_DELTCLASS:
 			err = tclass_del_notify(net, cops, skb, n, q, cl);
+			/* Unbind the class with flilters with 0 */
+			tc_bind_tclass(q, portid, clid, 0);
 			goto out;
 		case RTM_GETTCLASS:
 			err = tclass_notify(net, skb, n, q, cl, RTM_NEWTCLASS);
@@ -1767,9 +1828,12 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
 	err = -EOPNOTSUPP;
 	if (cops->change)
 		err = cops->change(q, clid, portid, tca, &new_cl);
-	if (err == 0)
+	if (err == 0) {
 		tclass_notify(net, skb, n, q, new_cl, RTM_NEWTCLASS);
-
+		/* We just create a new class, need to do reverse binding. */
+		if (cl != new_cl)
+			tc_bind_tclass(q, portid, clid, new_cl);
+	}
 out:
 	return err;
 }
-- 
cgit v1.2.3


From 1797f5b3cf0b3a73c42b89f7a8fd897417373730 Mon Sep 17 00:00:00 2001
From: Arkadi Sharshevsky <arkadis@mellanox.com>
Date: Thu, 31 Aug 2017 17:59:12 +0200
Subject: devlink: Add IPv6 header for dpipe

This will be used by the IPv6 host table which will be introduced in the
following patches. The fields in the header are added per-use. This header
is global and can be reused by many drivers.

Signed-off-by: Arkadi Sharshevsky <arkadis@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/devlink.h        |  1 +
 include/uapi/linux/devlink.h |  5 +++++
 net/core/devlink.c           | 17 +++++++++++++++++
 3 files changed, 23 insertions(+)

(limited to 'include/net')

diff --git a/include/net/devlink.h b/include/net/devlink.h
index aaf7178127a2..b9654e133599 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -330,6 +330,7 @@ int devlink_dpipe_match_put(struct sk_buff *skb,
 			    struct devlink_dpipe_match *match);
 extern struct devlink_dpipe_header devlink_dpipe_header_ethernet;
 extern struct devlink_dpipe_header devlink_dpipe_header_ipv4;
+extern struct devlink_dpipe_header devlink_dpipe_header_ipv6;
 
 #else
 
diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index 6c172548589d..0cbca96c66b9 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -234,9 +234,14 @@ enum devlink_dpipe_field_ipv4_id {
 	DEVLINK_DPIPE_FIELD_IPV4_DST_IP,
 };
 
+enum devlink_dpipe_field_ipv6_id {
+	DEVLINK_DPIPE_FIELD_IPV6_DST_IP,
+};
+
 enum devlink_dpipe_header_id {
 	DEVLINK_DPIPE_HEADER_ETHERNET,
 	DEVLINK_DPIPE_HEADER_IPV4,
+	DEVLINK_DPIPE_HEADER_IPV6,
 };
 
 #endif /* _UAPI_LINUX_DEVLINK_H_ */
diff --git a/net/core/devlink.c b/net/core/devlink.c
index cbc4b0461b0f..7d430c1d9c3e 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -63,6 +63,23 @@ struct devlink_dpipe_header devlink_dpipe_header_ipv4 = {
 };
 EXPORT_SYMBOL(devlink_dpipe_header_ipv4);
 
+static struct devlink_dpipe_field devlink_dpipe_fields_ipv6[] = {
+	{
+		.name = "destination ip",
+		.id = DEVLINK_DPIPE_FIELD_IPV6_DST_IP,
+		.bitwidth = 128,
+	},
+};
+
+struct devlink_dpipe_header devlink_dpipe_header_ipv6 = {
+	.name = "ipv6",
+	.id = DEVLINK_DPIPE_HEADER_IPV6,
+	.fields = devlink_dpipe_fields_ipv6,
+	.fields_count = ARRAY_SIZE(devlink_dpipe_fields_ipv6),
+	.global = true,
+};
+EXPORT_SYMBOL(devlink_dpipe_header_ipv6);
+
 EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_hwmsg);
 
 static LIST_HEAD(devlink_list);
-- 
cgit v1.2.3


From 65bce46298d064dff9db1282e17bb26602715819 Mon Sep 17 00:00:00 2001
From: Loic Poulain <loic.poulain@linaro.org>
Date: Fri, 1 Sep 2017 13:41:17 -0700
Subject: Bluetooth: make baswap src const

Signed-off-by: Loic Poulain <loic.poulain@linaro.org>
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/bluetooth.h | 2 +-
 net/bluetooth/lib.c               | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/net')

diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h
index 01487192f628..020142bb9735 100644
--- a/include/net/bluetooth/bluetooth.h
+++ b/include/net/bluetooth/bluetooth.h
@@ -233,7 +233,7 @@ static inline void bacpy(bdaddr_t *dst, const bdaddr_t *src)
 	memcpy(dst, src, sizeof(bdaddr_t));
 }
 
-void baswap(bdaddr_t *dst, bdaddr_t *src);
+void baswap(bdaddr_t *dst, const bdaddr_t *src);
 
 /* Common socket structures and functions */
 
diff --git a/net/bluetooth/lib.c b/net/bluetooth/lib.c
index aa4cf64e32a6..63e65d9b4b24 100644
--- a/net/bluetooth/lib.c
+++ b/net/bluetooth/lib.c
@@ -30,10 +30,10 @@
 
 #include <net/bluetooth/bluetooth.h>
 
-void baswap(bdaddr_t *dst, bdaddr_t *src)
+void baswap(bdaddr_t *dst, const bdaddr_t *src)
 {
-	unsigned char *d = (unsigned char *) dst;
-	unsigned char *s = (unsigned char *) src;
+	const unsigned char *s = (const unsigned char *)src;
+	unsigned char *d = (unsigned char *)dst;
 	unsigned int i;
 
 	for (i = 0; i < 6; i++)
-- 
cgit v1.2.3


From 864150dfa31dceab6ec5ca4579a2d35ede985cb7 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Fri, 1 Sep 2017 12:15:17 +0300
Subject: net: Add module reference to FIB notifiers

When a listener registers to the FIB notification chain it receives a
dump of the FIB entries and rules from existing address families by
invoking their dump operations.

While we call into these modules we need to make sure they aren't
removed. Do that by increasing their reference count before invoking
their dump operations and decrease it afterwards.

Fixes: 04b1d4e50e82 ("net: core: Make the FIB notification chain generic")
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/fib_notifier.h |  2 ++
 net/core/fib_notifier.c    | 13 +++++++++++--
 net/ipv4/fib_notifier.c    |  2 ++
 net/ipv6/fib6_notifier.c   |  2 ++
 4 files changed, 17 insertions(+), 2 deletions(-)

(limited to 'include/net')

diff --git a/include/net/fib_notifier.h b/include/net/fib_notifier.h
index 241475224f74..669b9716dc7a 100644
--- a/include/net/fib_notifier.h
+++ b/include/net/fib_notifier.h
@@ -2,6 +2,7 @@
 #define __NET_FIB_NOTIFIER_H
 
 #include <linux/types.h>
+#include <linux/module.h>
 #include <linux/notifier.h>
 #include <net/net_namespace.h>
 
@@ -26,6 +27,7 @@ struct fib_notifier_ops {
 	struct list_head list;
 	unsigned int (*fib_seq_read)(struct net *net);
 	int (*fib_dump)(struct net *net, struct notifier_block *nb);
+	struct module *owner;
 	struct rcu_head rcu;
 };
 
diff --git a/net/core/fib_notifier.c b/net/core/fib_notifier.c
index 292aab83702f..4fc202dbdfb6 100644
--- a/net/core/fib_notifier.c
+++ b/net/core/fib_notifier.c
@@ -2,6 +2,7 @@
 #include <linux/notifier.h>
 #include <linux/rcupdate.h>
 #include <linux/kernel.h>
+#include <linux/module.h>
 #include <linux/init.h>
 #include <net/net_namespace.h>
 #include <net/fib_notifier.h>
@@ -33,8 +34,12 @@ static unsigned int fib_seq_sum(void)
 
 	rtnl_lock();
 	for_each_net(net) {
-		list_for_each_entry(ops, &net->fib_notifier_ops, list)
+		list_for_each_entry(ops, &net->fib_notifier_ops, list) {
+			if (!try_module_get(ops->owner))
+				continue;
 			fib_seq += ops->fib_seq_read(net);
+			module_put(ops->owner);
+		}
 	}
 	rtnl_unlock();
 
@@ -46,8 +51,12 @@ static int fib_net_dump(struct net *net, struct notifier_block *nb)
 	struct fib_notifier_ops *ops;
 
 	list_for_each_entry_rcu(ops, &net->fib_notifier_ops, list) {
-		int err = ops->fib_dump(net, nb);
+		int err;
 
+		if (!try_module_get(ops->owner))
+			continue;
+		err = ops->fib_dump(net, nb);
+		module_put(ops->owner);
 		if (err)
 			return err;
 	}
diff --git a/net/ipv4/fib_notifier.c b/net/ipv4/fib_notifier.c
index 5d7afb145562..cfd420b0572c 100644
--- a/net/ipv4/fib_notifier.c
+++ b/net/ipv4/fib_notifier.c
@@ -2,6 +2,7 @@
 #include <linux/notifier.h>
 #include <linux/socket.h>
 #include <linux/kernel.h>
+#include <linux/export.h>
 #include <net/net_namespace.h>
 #include <net/fib_notifier.h>
 #include <net/netns/ipv4.h>
@@ -49,6 +50,7 @@ static const struct fib_notifier_ops fib4_notifier_ops_template = {
 	.family		= AF_INET,
 	.fib_seq_read	= fib4_seq_read,
 	.fib_dump	= fib4_dump,
+	.owner		= THIS_MODULE,
 };
 
 int __net_init fib4_notifier_init(struct net *net)
diff --git a/net/ipv6/fib6_notifier.c b/net/ipv6/fib6_notifier.c
index 66a103ef7e86..05f82baaa99e 100644
--- a/net/ipv6/fib6_notifier.c
+++ b/net/ipv6/fib6_notifier.c
@@ -1,6 +1,7 @@
 #include <linux/notifier.h>
 #include <linux/socket.h>
 #include <linux/kernel.h>
+#include <linux/export.h>
 #include <net/net_namespace.h>
 #include <net/fib_notifier.h>
 #include <net/netns/ipv6.h>
@@ -41,6 +42,7 @@ static const struct fib_notifier_ops fib6_notifier_ops_template = {
 	.family		= AF_INET6,
 	.fib_seq_read	= fib6_seq_read,
 	.fib_dump	= fib6_dump,
+	.owner		= THIS_MODULE,
 };
 
 int __net_init fib6_notifier_init(struct net *net)
-- 
cgit v1.2.3


From 64327fc811268d4a24de03dac242ea29de6be75f Mon Sep 17 00:00:00 2001
From: Stefano Brivio <sbrivio@redhat.com>
Date: Thu, 31 Aug 2017 18:11:41 +0200
Subject: ipv4: Don't override return code from ip_route_input_noref()

After ip_route_input() calls ip_route_input_noref(), another
check on skb_dst() is done, but if this fails, we shouldn't
override the return code from ip_route_input_noref(), as it
could have been more specific (i.e. -EHOSTUNREACH).

This also saves one call to skb_dst_force_safe() and one to
skb_dst() in case the ip_route_input_noref() check fails.

Reported-by: Sabrina Dubroca <sdubroca@redhat.com>
Fixes: 9df16efadd2a ("ipv4: call dst_hold_safe() properly")
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
Acked-by: Wei Wang <weiwan@google.com>
Acked-by: Sabrina Dubroca <sd@queasysnail.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/route.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'include/net')

diff --git a/include/net/route.h b/include/net/route.h
index cb0a76d9dde1..1b09a9368c68 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -189,10 +189,11 @@ static inline int ip_route_input(struct sk_buff *skb, __be32 dst, __be32 src,
 
 	rcu_read_lock();
 	err = ip_route_input_noref(skb, dst, src, tos, devin);
-	if (!err)
+	if (!err) {
 		skb_dst_force_safe(skb);
-	if (!skb_dst(skb))
-		err = -EINVAL;
+		if (!skb_dst(skb))
+			err = -EINVAL;
+	}
 	rcu_read_unlock();
 
 	return err;
-- 
cgit v1.2.3


From fb452a1aa3fd4034d7999e309c5466ff2d7005aa Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <brouer@redhat.com>
Date: Fri, 1 Sep 2017 11:26:08 +0200
Subject: Revert "net: use lib/percpu_counter API for fragmentation mem
 accounting"

This reverts commit 6d7b857d541ecd1d9bd997c97242d4ef94b19de2.

There is a bug in fragmentation codes use of the percpu_counter API,
that can cause issues on systems with many CPUs.

The frag_mem_limit() just reads the global counter (fbc->count),
without considering other CPUs can have upto batch size (130K) that
haven't been subtracted yet.  Due to the 3MBytes lower thresh limit,
this become dangerous at >=24 CPUs (3*1024*1024/130000=24).

The correct API usage would be to use __percpu_counter_compare() which
does the right thing, and takes into account the number of (online)
CPUs and batch size, to account for this and call __percpu_counter_sum()
when needed.

We choose to revert the use of the lib/percpu_counter API for frag
memory accounting for several reasons:

1) On systems with CPUs > 24, the heavier fully locked
   __percpu_counter_sum() is always invoked, which will be more
   expensive than the atomic_t that is reverted to.

Given systems with more than 24 CPUs are becoming common this doesn't
seem like a good option.  To mitigate this, the batch size could be
decreased and thresh be increased.

2) The add_frag_mem_limit+sub_frag_mem_limit pairs happen on the RX
   CPU, before SKBs are pushed into sockets on remote CPUs.  Given
   NICs can only hash on L2 part of the IP-header, the NIC-RXq's will
   likely be limited.  Thus, a fair chance that atomic add+dec happen
   on the same CPU.

Revert note that commit 1d6119baf061 ("net: fix percpu memory leaks")
removed init_frag_mem_limit() and instead use inet_frags_init_net().
After this revert, inet_frags_uninit_net() becomes empty.

Fixes: 6d7b857d541e ("net: use lib/percpu_counter API for fragmentation mem accounting")
Fixes: 1d6119baf061 ("net: fix percpu memory leaks")
Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Acked-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_frag.h  | 30 +++++++++---------------------
 net/ipv4/inet_fragment.c |  4 +---
 2 files changed, 10 insertions(+), 24 deletions(-)

(limited to 'include/net')

diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
index 6fdcd2427776..fa635aa6d0b9 100644
--- a/include/net/inet_frag.h
+++ b/include/net/inet_frag.h
@@ -1,14 +1,9 @@
 #ifndef __NET_FRAG_H__
 #define __NET_FRAG_H__
 
-#include <linux/percpu_counter.h>
-
 struct netns_frags {
-	/* The percpu_counter "mem" need to be cacheline aligned.
-	 *  mem.count must not share cacheline with other writers
-	 */
-	struct percpu_counter   mem ____cacheline_aligned_in_smp;
-
+	/* Keep atomic mem on separate cachelines in structs that include it */
+	atomic_t		mem ____cacheline_aligned_in_smp;
 	/* sysctls */
 	int			timeout;
 	int			high_thresh;
@@ -110,11 +105,11 @@ void inet_frags_fini(struct inet_frags *);
 
 static inline int inet_frags_init_net(struct netns_frags *nf)
 {
-	return percpu_counter_init(&nf->mem, 0, GFP_KERNEL);
+	atomic_set(&nf->mem, 0);
+	return 0;
 }
 static inline void inet_frags_uninit_net(struct netns_frags *nf)
 {
-	percpu_counter_destroy(&nf->mem);
 }
 
 void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f);
@@ -140,31 +135,24 @@ static inline bool inet_frag_evicting(struct inet_frag_queue *q)
 
 /* Memory Tracking Functions. */
 
-/* The default percpu_counter batch size is not big enough to scale to
- * fragmentation mem acct sizes.
- * The mem size of a 64K fragment is approx:
- *  (44 fragments * 2944 truesize) + frag_queue struct(200) = 129736 bytes
- */
-static unsigned int frag_percpu_counter_batch = 130000;
-
 static inline int frag_mem_limit(struct netns_frags *nf)
 {
-	return percpu_counter_read(&nf->mem);
+	return atomic_read(&nf->mem);
 }
 
 static inline void sub_frag_mem_limit(struct netns_frags *nf, int i)
 {
-	percpu_counter_add_batch(&nf->mem, -i, frag_percpu_counter_batch);
+	atomic_sub(i, &nf->mem);
 }
 
 static inline void add_frag_mem_limit(struct netns_frags *nf, int i)
 {
-	percpu_counter_add_batch(&nf->mem, i, frag_percpu_counter_batch);
+	atomic_add(i, &nf->mem);
 }
 
-static inline unsigned int sum_frag_mem_limit(struct netns_frags *nf)
+static inline int sum_frag_mem_limit(struct netns_frags *nf)
 {
-	return percpu_counter_sum_positive(&nf->mem);
+	return atomic_read(&nf->mem);
 }
 
 /* RFC 3168 support :
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index 96e95e83cc61..af74d0433453 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -234,10 +234,8 @@ evict_again:
 	cond_resched();
 
 	if (read_seqretry(&f->rnd_seqlock, seq) ||
-	    percpu_counter_sum(&nf->mem))
+	    sum_frag_mem_limit(nf))
 		goto evict_again;
-
-	percpu_counter_destroy(&nf->mem);
 }
 EXPORT_SYMBOL(inet_frags_exit_net);
 
-- 
cgit v1.2.3


From 5a63643e583b6a9789d7a225ae076fb4e603991c Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <brouer@redhat.com>
Date: Fri, 1 Sep 2017 11:26:13 +0200
Subject: Revert "net: fix percpu memory leaks"

This reverts commit 1d6119baf0610f813eb9d9580eb4fd16de5b4ceb.

After reverting commit 6d7b857d541e ("net: use lib/percpu_counter API
for fragmentation mem accounting") then here is no need for this
fix-up patch.  As percpu_counter is no longer used, it cannot
memory leak it any-longer.

Fixes: 6d7b857d541e ("net: use lib/percpu_counter API for fragmentation mem accounting")
Fixes: 1d6119baf061 ("net: fix percpu memory leaks")
Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_frag.h                 |  7 +------
 net/ieee802154/6lowpan/reassembly.c     | 11 +++--------
 net/ipv4/ip_fragment.c                  | 12 +++---------
 net/ipv6/netfilter/nf_conntrack_reasm.c | 12 +++---------
 net/ipv6/reassembly.c                   | 12 +++---------
 5 files changed, 13 insertions(+), 41 deletions(-)

(limited to 'include/net')

diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
index fa635aa6d0b9..fc59e0775e00 100644
--- a/include/net/inet_frag.h
+++ b/include/net/inet_frag.h
@@ -103,15 +103,10 @@ struct inet_frags {
 int inet_frags_init(struct inet_frags *);
 void inet_frags_fini(struct inet_frags *);
 
-static inline int inet_frags_init_net(struct netns_frags *nf)
+static inline void inet_frags_init_net(struct netns_frags *nf)
 {
 	atomic_set(&nf->mem, 0);
-	return 0;
 }
-static inline void inet_frags_uninit_net(struct netns_frags *nf)
-{
-}
-
 void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f);
 
 void inet_frag_kill(struct inet_frag_queue *q, struct inet_frags *f);
diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c
index 30d875dff6b5..f85b08baff16 100644
--- a/net/ieee802154/6lowpan/reassembly.c
+++ b/net/ieee802154/6lowpan/reassembly.c
@@ -580,19 +580,14 @@ static int __net_init lowpan_frags_init_net(struct net *net)
 {
 	struct netns_ieee802154_lowpan *ieee802154_lowpan =
 		net_ieee802154_lowpan(net);
-	int res;
 
 	ieee802154_lowpan->frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
 	ieee802154_lowpan->frags.low_thresh = IPV6_FRAG_LOW_THRESH;
 	ieee802154_lowpan->frags.timeout = IPV6_FRAG_TIMEOUT;
 
-	res = inet_frags_init_net(&ieee802154_lowpan->frags);
-	if (res)
-		return res;
-	res = lowpan_frags_ns_sysctl_register(net);
-	if (res)
-		inet_frags_uninit_net(&ieee802154_lowpan->frags);
-	return res;
+	inet_frags_init_net(&ieee802154_lowpan->frags);
+
+	return lowpan_frags_ns_sysctl_register(net);
 }
 
 static void __net_exit lowpan_frags_exit_net(struct net *net)
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 9a8cfac503dc..46408c220d9d 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -844,8 +844,6 @@ static void __init ip4_frags_ctl_register(void)
 
 static int __net_init ipv4_frags_init_net(struct net *net)
 {
-	int res;
-
 	/* Fragment cache limits.
 	 *
 	 * The fragment memory accounting code, (tries to) account for
@@ -871,13 +869,9 @@ static int __net_init ipv4_frags_init_net(struct net *net)
 
 	net->ipv4.frags.max_dist = 64;
 
-	res = inet_frags_init_net(&net->ipv4.frags);
-	if (res)
-		return res;
-	res = ip4_frags_ns_ctl_register(net);
-	if (res)
-		inet_frags_uninit_net(&net->ipv4.frags);
-	return res;
+	inet_frags_init_net(&net->ipv4.frags);
+
+	return ip4_frags_ns_ctl_register(net);
 }
 
 static void __net_exit ipv4_frags_exit_net(struct net *net)
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 986d4ca38832..b263bf3a19f7 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -622,18 +622,12 @@ EXPORT_SYMBOL_GPL(nf_ct_frag6_gather);
 
 static int nf_ct_net_init(struct net *net)
 {
-	int res;
-
 	net->nf_frag.frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
 	net->nf_frag.frags.low_thresh = IPV6_FRAG_LOW_THRESH;
 	net->nf_frag.frags.timeout = IPV6_FRAG_TIMEOUT;
-	res = inet_frags_init_net(&net->nf_frag.frags);
-	if (res)
-		return res;
-	res = nf_ct_frag6_sysctl_register(net);
-	if (res)
-		inet_frags_uninit_net(&net->nf_frag.frags);
-	return res;
+	inet_frags_init_net(&net->nf_frag.frags);
+
+	return nf_ct_frag6_sysctl_register(net);
 }
 
 static void nf_ct_net_exit(struct net *net)
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index e1da5b888cc4..846012eae526 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -714,19 +714,13 @@ static void ip6_frags_sysctl_unregister(void)
 
 static int __net_init ipv6_frags_init_net(struct net *net)
 {
-	int res;
-
 	net->ipv6.frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
 	net->ipv6.frags.low_thresh = IPV6_FRAG_LOW_THRESH;
 	net->ipv6.frags.timeout = IPV6_FRAG_TIMEOUT;
 
-	res = inet_frags_init_net(&net->ipv6.frags);
-	if (res)
-		return res;
-	res = ip6_frags_ns_sysctl_register(net);
-	if (res)
-		inet_frags_uninit_net(&net->ipv6.frags);
-	return res;
+	inet_frags_init_net(&net->ipv6.frags);
+
+	return ip6_frags_ns_sysctl_register(net);
 }
 
 static void __net_exit ipv6_frags_exit_net(struct net *net)
-- 
cgit v1.2.3


From dfc46034b54af3abf594de75a1ee43ef2ec2a60a Mon Sep 17 00:00:00 2001
From: "Pablo M. Bermudo Garay" <pablombg@gmail.com>
Date: Wed, 23 Aug 2017 22:41:23 +0200
Subject: netfilter: nf_tables: add select_ops for stateful objects

This patch adds support for overloading stateful objects operations
through the select_ops() callback, just as it is implemented for
expressions.

This change is needed for upcoming additions to the stateful objects
infrastructure.

Signed-off-by: Pablo M. Bermudo Garay <pablombg@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h | 35 ++++++++++++++++++++++++-----------
 net/netfilter/nf_tables_api.c     | 36 ++++++++++++++++++++++++------------
 net/netfilter/nft_counter.c       | 20 +++++++++++++-------
 net/netfilter/nft_ct.c            | 18 ++++++++++++------
 net/netfilter/nft_objref.c        |  7 ++++---
 net/netfilter/nft_quota.c         | 20 +++++++++++++-------
 6 files changed, 90 insertions(+), 46 deletions(-)

(limited to 'include/net')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index f9795fe394f3..0f5b12a4ad09 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -1007,12 +1007,12 @@ int nft_verdict_dump(struct sk_buff *skb, int type,
  *
  *	@list: table stateful object list node
  *	@table: table this object belongs to
- *	@type: pointer to object type
- *	@data: pointer to object data
  *	@name: name of this stateful object
  *	@genmask: generation mask
  *	@use: number of references to this stateful object
  * 	@data: object data, layout depends on type
+ *	@ops: object operations
+ *	@data: pointer to object data
  */
 struct nft_object {
 	struct list_head		list;
@@ -1021,7 +1021,7 @@ struct nft_object {
 	u32				genmask:2,
 					use:30;
 	/* runtime data below here */
-	const struct nft_object_type	*type ____cacheline_aligned;
+	const struct nft_object_ops	*ops ____cacheline_aligned;
 	unsigned char			data[]
 		__attribute__((aligned(__alignof__(u64))));
 };
@@ -1044,27 +1044,39 @@ void nft_obj_notify(struct net *net, struct nft_table *table,
 /**
  *	struct nft_object_type - stateful object type
  *
- *	@eval: stateful object evaluation function
+ *	@select_ops: function to select nft_object_ops
+ *	@ops: default ops, used when no select_ops functions is present
  *	@list: list node in list of object types
  *	@type: stateful object numeric type
- *	@size: stateful object size
  *	@owner: module owner
  *	@maxattr: maximum netlink attribute
  *	@policy: netlink attribute policy
+ */
+struct nft_object_type {
+	const struct nft_object_ops	*(*select_ops)(const struct nft_ctx *,
+						       const struct nlattr * const tb[]);
+	const struct nft_object_ops	*ops;
+	struct list_head		list;
+	u32				type;
+	unsigned int                    maxattr;
+	struct module			*owner;
+	const struct nla_policy		*policy;
+};
+
+/**
+ *	struct nft_object_ops - stateful object operations
+ *
+ *	@eval: stateful object evaluation function
+ *	@size: stateful object size
  *	@init: initialize object from netlink attributes
  *	@destroy: release existing stateful object
  *	@dump: netlink dump stateful object
  */
-struct nft_object_type {
+struct nft_object_ops {
 	void				(*eval)(struct nft_object *obj,
 						struct nft_regs *regs,
 						const struct nft_pktinfo *pkt);
-	struct list_head		list;
-	u32				type;
 	unsigned int			size;
-	unsigned int			maxattr;
-	struct module			*owner;
-	const struct nla_policy		*policy;
 	int				(*init)(const struct nft_ctx *ctx,
 						const struct nlattr *const tb[],
 						struct nft_object *obj);
@@ -1072,6 +1084,7 @@ struct nft_object_type {
 	int				(*dump)(struct sk_buff *skb,
 						struct nft_object *obj,
 						bool reset);
+	const struct nft_object_type	*type;
 };
 
 int nft_register_obj(struct nft_object_type *obj_type);
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 149785ff1c7b..b37c178897f3 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -4248,7 +4248,7 @@ struct nft_object *nf_tables_obj_lookup(const struct nft_table *table,
 
 	list_for_each_entry(obj, &table->objects, list) {
 		if (!nla_strcmp(nla, obj->name) &&
-		    objtype == obj->type->type &&
+		    objtype == obj->ops->type->type &&
 		    nft_active_genmask(obj, genmask))
 			return obj;
 	}
@@ -4270,6 +4270,7 @@ static struct nft_object *nft_obj_init(const struct nft_ctx *ctx,
 				       const struct nlattr *attr)
 {
 	struct nlattr *tb[type->maxattr + 1];
+	const struct nft_object_ops *ops;
 	struct nft_object *obj;
 	int err;
 
@@ -4282,16 +4283,27 @@ static struct nft_object *nft_obj_init(const struct nft_ctx *ctx,
 		memset(tb, 0, sizeof(tb[0]) * (type->maxattr + 1));
 	}
 
+	if (type->select_ops) {
+		ops = type->select_ops(ctx, (const struct nlattr * const *)tb);
+		if (IS_ERR(ops)) {
+			err = PTR_ERR(ops);
+			goto err1;
+		}
+	} else {
+		ops = type->ops;
+	}
+
 	err = -ENOMEM;
-	obj = kzalloc(sizeof(struct nft_object) + type->size, GFP_KERNEL);
+	obj = kzalloc(sizeof(*obj) + ops->size, GFP_KERNEL);
 	if (obj == NULL)
 		goto err1;
 
-	err = type->init(ctx, (const struct nlattr * const *)tb, obj);
+	err = ops->init(ctx, (const struct nlattr * const *)tb, obj);
 	if (err < 0)
 		goto err2;
 
-	obj->type = type;
+	obj->ops = ops;
+
 	return obj;
 err2:
 	kfree(obj);
@@ -4307,7 +4319,7 @@ static int nft_object_dump(struct sk_buff *skb, unsigned int attr,
 	nest = nla_nest_start(skb, attr);
 	if (!nest)
 		goto nla_put_failure;
-	if (obj->type->dump(skb, obj, reset) < 0)
+	if (obj->ops->dump(skb, obj, reset) < 0)
 		goto nla_put_failure;
 	nla_nest_end(skb, nest);
 	return 0;
@@ -4418,8 +4430,8 @@ static int nf_tables_newobj(struct net *net, struct sock *nlsk,
 err3:
 	kfree(obj->name);
 err2:
-	if (obj->type->destroy)
-		obj->type->destroy(obj);
+	if (obj->ops->destroy)
+		obj->ops->destroy(obj);
 	kfree(obj);
 err1:
 	module_put(type->owner);
@@ -4446,7 +4458,7 @@ static int nf_tables_fill_obj_info(struct sk_buff *skb, struct net *net,
 
 	if (nla_put_string(skb, NFTA_OBJ_TABLE, table->name) ||
 	    nla_put_string(skb, NFTA_OBJ_NAME, obj->name) ||
-	    nla_put_be32(skb, NFTA_OBJ_TYPE, htonl(obj->type->type)) ||
+	    nla_put_be32(skb, NFTA_OBJ_TYPE, htonl(obj->ops->type->type)) ||
 	    nla_put_be32(skb, NFTA_OBJ_USE, htonl(obj->use)) ||
 	    nft_object_dump(skb, NFTA_OBJ_DATA, obj, reset))
 		goto nla_put_failure;
@@ -4500,7 +4512,7 @@ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb)
 					goto cont;
 				if (filter &&
 				    filter->type != NFT_OBJECT_UNSPEC &&
-				    obj->type->type != filter->type)
+				    obj->ops->type->type != filter->type)
 					goto cont;
 
 				if (nf_tables_fill_obj_info(skb, net, NETLINK_CB(cb->skb).portid,
@@ -4628,10 +4640,10 @@ err:
 
 static void nft_obj_destroy(struct nft_object *obj)
 {
-	if (obj->type->destroy)
-		obj->type->destroy(obj);
+	if (obj->ops->destroy)
+		obj->ops->destroy(obj);
 
-	module_put(obj->type->owner);
+	module_put(obj->ops->type->owner);
 	kfree(obj->name);
 	kfree(obj);
 }
diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c
index 67a710ebde09..eefe3b409925 100644
--- a/net/netfilter/nft_counter.c
+++ b/net/netfilter/nft_counter.c
@@ -175,15 +175,21 @@ static const struct nla_policy nft_counter_policy[NFTA_COUNTER_MAX + 1] = {
 	[NFTA_COUNTER_BYTES]	= { .type = NLA_U64 },
 };
 
-static struct nft_object_type nft_counter_obj __read_mostly = {
-	.type		= NFT_OBJECT_COUNTER,
+static struct nft_object_type nft_counter_obj_type;
+static const struct nft_object_ops nft_counter_obj_ops = {
+	.type		= &nft_counter_obj_type,
 	.size		= sizeof(struct nft_counter_percpu_priv),
-	.maxattr	= NFTA_COUNTER_MAX,
-	.policy		= nft_counter_policy,
 	.eval		= nft_counter_obj_eval,
 	.init		= nft_counter_obj_init,
 	.destroy	= nft_counter_obj_destroy,
 	.dump		= nft_counter_obj_dump,
+};
+
+static struct nft_object_type nft_counter_obj_type __read_mostly = {
+	.type		= NFT_OBJECT_COUNTER,
+	.ops		= &nft_counter_obj_ops,
+	.maxattr	= NFTA_COUNTER_MAX,
+	.policy		= nft_counter_policy,
 	.owner		= THIS_MODULE,
 };
 
@@ -271,7 +277,7 @@ static int __init nft_counter_module_init(void)
 	for_each_possible_cpu(cpu)
 		seqcount_init(per_cpu_ptr(&nft_counter_seq, cpu));
 
-	err = nft_register_obj(&nft_counter_obj);
+	err = nft_register_obj(&nft_counter_obj_type);
 	if (err < 0)
 		return err;
 
@@ -281,14 +287,14 @@ static int __init nft_counter_module_init(void)
 
 	return 0;
 err1:
-	nft_unregister_obj(&nft_counter_obj);
+	nft_unregister_obj(&nft_counter_obj_type);
 	return err;
 }
 
 static void __exit nft_counter_module_exit(void)
 {
 	nft_unregister_expr(&nft_counter_type);
-	nft_unregister_obj(&nft_counter_obj);
+	nft_unregister_obj(&nft_counter_obj_type);
 }
 
 module_init(nft_counter_module_init);
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index 1678e9e75e8e..bd0975d7dd6f 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -904,15 +904,21 @@ static const struct nla_policy nft_ct_helper_policy[NFTA_CT_HELPER_MAX + 1] = {
 	[NFTA_CT_HELPER_L4PROTO] = { .type = NLA_U8 },
 };
 
-static struct nft_object_type nft_ct_helper_obj __read_mostly = {
-	.type		= NFT_OBJECT_CT_HELPER,
+static struct nft_object_type nft_ct_helper_obj_type;
+static const struct nft_object_ops nft_ct_helper_obj_ops = {
+	.type		= &nft_ct_helper_obj_type,
 	.size		= sizeof(struct nft_ct_helper_obj),
-	.maxattr	= NFTA_CT_HELPER_MAX,
-	.policy		= nft_ct_helper_policy,
 	.eval		= nft_ct_helper_obj_eval,
 	.init		= nft_ct_helper_obj_init,
 	.destroy	= nft_ct_helper_obj_destroy,
 	.dump		= nft_ct_helper_obj_dump,
+};
+
+static struct nft_object_type nft_ct_helper_obj_type __read_mostly = {
+	.type		= NFT_OBJECT_CT_HELPER,
+	.ops		= &nft_ct_helper_obj_ops,
+	.maxattr	= NFTA_CT_HELPER_MAX,
+	.policy		= nft_ct_helper_policy,
 	.owner		= THIS_MODULE,
 };
 
@@ -930,7 +936,7 @@ static int __init nft_ct_module_init(void)
 	if (err < 0)
 		goto err1;
 
-	err = nft_register_obj(&nft_ct_helper_obj);
+	err = nft_register_obj(&nft_ct_helper_obj_type);
 	if (err < 0)
 		goto err2;
 
@@ -945,7 +951,7 @@ err1:
 
 static void __exit nft_ct_module_exit(void)
 {
-	nft_unregister_obj(&nft_ct_helper_obj);
+	nft_unregister_obj(&nft_ct_helper_obj_type);
 	nft_unregister_expr(&nft_notrack_type);
 	nft_unregister_expr(&nft_ct_type);
 }
diff --git a/net/netfilter/nft_objref.c b/net/netfilter/nft_objref.c
index 1dd428fbaaa3..7bcdc48f3d73 100644
--- a/net/netfilter/nft_objref.c
+++ b/net/netfilter/nft_objref.c
@@ -22,7 +22,7 @@ static void nft_objref_eval(const struct nft_expr *expr,
 {
 	struct nft_object *obj = nft_objref_priv(expr);
 
-	obj->type->eval(obj, regs, pkt);
+	obj->ops->eval(obj, regs, pkt);
 }
 
 static int nft_objref_init(const struct nft_ctx *ctx,
@@ -54,7 +54,8 @@ static int nft_objref_dump(struct sk_buff *skb, const struct nft_expr *expr)
 	const struct nft_object *obj = nft_objref_priv(expr);
 
 	if (nla_put_string(skb, NFTA_OBJREF_IMM_NAME, obj->name) ||
-	    nla_put_be32(skb, NFTA_OBJREF_IMM_TYPE, htonl(obj->type->type)))
+	    nla_put_be32(skb, NFTA_OBJREF_IMM_TYPE,
+			 htonl(obj->ops->type->type)))
 		goto nla_put_failure;
 
 	return 0;
@@ -104,7 +105,7 @@ static void nft_objref_map_eval(const struct nft_expr *expr,
 		return;
 	}
 	obj = *nft_set_ext_obj(ext);
-	obj->type->eval(obj, regs, pkt);
+	obj->ops->eval(obj, regs, pkt);
 }
 
 static int nft_objref_map_init(const struct nft_ctx *ctx,
diff --git a/net/netfilter/nft_quota.c b/net/netfilter/nft_quota.c
index 25e33159be57..0ed124a93fcf 100644
--- a/net/netfilter/nft_quota.c
+++ b/net/netfilter/nft_quota.c
@@ -151,14 +151,20 @@ static int nft_quota_obj_dump(struct sk_buff *skb, struct nft_object *obj,
 	return nft_quota_do_dump(skb, priv, reset);
 }
 
-static struct nft_object_type nft_quota_obj __read_mostly = {
-	.type		= NFT_OBJECT_QUOTA,
+static struct nft_object_type nft_quota_obj_type;
+static const struct nft_object_ops nft_quota_obj_ops = {
+	.type		= &nft_quota_obj_type,
 	.size		= sizeof(struct nft_quota),
-	.maxattr	= NFTA_QUOTA_MAX,
-	.policy		= nft_quota_policy,
 	.init		= nft_quota_obj_init,
 	.eval		= nft_quota_obj_eval,
 	.dump		= nft_quota_obj_dump,
+};
+
+static struct nft_object_type nft_quota_obj_type __read_mostly = {
+	.type		= NFT_OBJECT_QUOTA,
+	.ops		= &nft_quota_obj_ops,
+	.maxattr	= NFTA_QUOTA_MAX,
+	.policy		= nft_quota_policy,
 	.owner		= THIS_MODULE,
 };
 
@@ -209,7 +215,7 @@ static int __init nft_quota_module_init(void)
 {
 	int err;
 
-	err = nft_register_obj(&nft_quota_obj);
+	err = nft_register_obj(&nft_quota_obj_type);
 	if (err < 0)
 		return err;
 
@@ -219,14 +225,14 @@ static int __init nft_quota_module_init(void)
 
 	return 0;
 err1:
-	nft_unregister_obj(&nft_quota_obj);
+	nft_unregister_obj(&nft_quota_obj_type);
 	return err;
 }
 
 static void __exit nft_quota_module_exit(void)
 {
 	nft_unregister_expr(&nft_quota_type);
-	nft_unregister_obj(&nft_quota_obj);
+	nft_unregister_obj(&nft_quota_obj_type);
 }
 
 module_init(nft_quota_module_init);
-- 
cgit v1.2.3


From d1c1e39de8357d66163da39e893e38ea1410e8f8 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Tue, 29 Aug 2017 12:04:10 +0200
Subject: netfilter: remove unused hooknum arg from packet functions

tested with allmodconfig build.

Signed-off-by: Florian Westphal <fw@strlen.de>
---
 include/net/netfilter/nf_conntrack_l4proto.h   | 1 -
 net/ipv4/netfilter/nf_conntrack_proto_icmp.c   | 1 -
 net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c | 6 ++----
 net/netfilter/nf_conntrack_core.c              | 2 +-
 net/netfilter/nf_conntrack_proto_dccp.c        | 2 +-
 net/netfilter/nf_conntrack_proto_generic.c     | 1 -
 net/netfilter/nf_conntrack_proto_gre.c         | 1 -
 net/netfilter/nf_conntrack_proto_sctp.c        | 1 -
 net/netfilter/nf_conntrack_proto_tcp.c         | 1 -
 net/netfilter/nf_conntrack_proto_udp.c         | 1 -
 10 files changed, 4 insertions(+), 13 deletions(-)

(limited to 'include/net')

diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h
index d4933d56809d..738a0307a96b 100644
--- a/include/net/netfilter/nf_conntrack_l4proto.h
+++ b/include/net/netfilter/nf_conntrack_l4proto.h
@@ -43,7 +43,6 @@ struct nf_conntrack_l4proto {
 		      unsigned int dataoff,
 		      enum ip_conntrack_info ctinfo,
 		      u_int8_t pf,
-		      unsigned int hooknum,
 		      unsigned int *timeouts);
 
 	/* Called when a new connection for this protocol found;
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index 434b4e20f6db..ce108a996316 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -82,7 +82,6 @@ static int icmp_packet(struct nf_conn *ct,
 		       unsigned int dataoff,
 		       enum ip_conntrack_info ctinfo,
 		       u_int8_t pf,
-		       unsigned int hooknum,
 		       unsigned int *timeout)
 {
 	/* Do not immediately delete the connection after the first
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index 43544b975eae..30e34c4de003 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -95,7 +95,6 @@ static int icmpv6_packet(struct nf_conn *ct,
 		       unsigned int dataoff,
 		       enum ip_conntrack_info ctinfo,
 		       u_int8_t pf,
-		       unsigned int hooknum,
 		       unsigned int *timeout)
 {
 	/* Do not immediately delete the connection after the first
@@ -129,8 +128,7 @@ static bool icmpv6_new(struct nf_conn *ct, const struct sk_buff *skb,
 static int
 icmpv6_error_message(struct net *net, struct nf_conn *tmpl,
 		     struct sk_buff *skb,
-		     unsigned int icmp6off,
-		     unsigned int hooknum)
+		     unsigned int icmp6off)
 {
 	struct nf_conntrack_tuple intuple, origtuple;
 	const struct nf_conntrack_tuple_hash *h;
@@ -214,7 +212,7 @@ icmpv6_error(struct net *net, struct nf_conn *tmpl,
 	if (icmp6h->icmp6_type >= 128)
 		return NF_ACCEPT;
 
-	return icmpv6_error_message(net, tmpl, skb, dataoff, hooknum);
+	return icmpv6_error_message(net, tmpl, skb, dataoff);
 }
 
 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index c23df7c9cd59..ee5555dd7ebc 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1414,7 +1414,7 @@ repeat:
 	/* Decide what timeout policy we want to apply to this flow. */
 	timeouts = nf_ct_timeout_lookup(net, ct, l4proto);
 
-	ret = l4proto->packet(ct, skb, dataoff, ctinfo, pf, hooknum, timeouts);
+	ret = l4proto->packet(ct, skb, dataoff, ctinfo, pf, timeouts);
 	if (ret <= 0) {
 		/* Invalid: inverse of the return code tells
 		 * the netfilter core what to do */
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index 188347571fc7..0f5a4d79f6b8 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -469,7 +469,7 @@ static unsigned int *dccp_get_timeouts(struct net *net)
 
 static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb,
 		       unsigned int dataoff, enum ip_conntrack_info ctinfo,
-		       u_int8_t pf, unsigned int hooknum,
+		       u_int8_t pf,
 		       unsigned int *timeouts)
 {
 	struct net *net = nf_ct_net(ct);
diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c
index 2993995b690d..9cd40700842e 100644
--- a/net/netfilter/nf_conntrack_proto_generic.c
+++ b/net/netfilter/nf_conntrack_proto_generic.c
@@ -61,7 +61,6 @@ static int generic_packet(struct nf_conn *ct,
 			  unsigned int dataoff,
 			  enum ip_conntrack_info ctinfo,
 			  u_int8_t pf,
-			  unsigned int hooknum,
 			  unsigned int *timeout)
 {
 	nf_ct_refresh_acct(ct, ctinfo, skb, *timeout);
diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index c0e3a23ac23a..09a90484c27d 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -245,7 +245,6 @@ static int gre_packet(struct nf_conn *ct,
 		      unsigned int dataoff,
 		      enum ip_conntrack_info ctinfo,
 		      u_int8_t pf,
-		      unsigned int hooknum,
 		      unsigned int *timeouts)
 {
 	/* If we've seen traffic both ways, this is a GRE connection.
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index 890b5c73368d..6303a88af12b 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -307,7 +307,6 @@ static int sctp_packet(struct nf_conn *ct,
 		       unsigned int dataoff,
 		       enum ip_conntrack_info ctinfo,
 		       u_int8_t pf,
-		       unsigned int hooknum,
 		       unsigned int *timeouts)
 {
 	enum sctp_conntrack new_state, old_state;
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 33c52d9ab2f5..cba1c6ffe51a 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -803,7 +803,6 @@ static int tcp_packet(struct nf_conn *ct,
 		      unsigned int dataoff,
 		      enum ip_conntrack_info ctinfo,
 		      u_int8_t pf,
-		      unsigned int hooknum,
 		      unsigned int *timeouts)
 {
 	struct net *net = nf_ct_net(ct);
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
index dcf3030d2226..8af734cd1a94 100644
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -74,7 +74,6 @@ static int udp_packet(struct nf_conn *ct,
 		      unsigned int dataoff,
 		      enum ip_conntrack_info ctinfo,
 		      u_int8_t pf,
-		      unsigned int hooknum,
 		      unsigned int *timeouts)
 {
 	/* If we've seen traffic both ways, this is some kind of UDP
-- 
cgit v1.2.3


From 44d6e2f27328b254111dd716fde45b3b59b8a4f7 Mon Sep 17 00:00:00 2001
From: Varsha Rao <rvarsha016@gmail.com>
Date: Wed, 30 Aug 2017 13:37:11 +0530
Subject: net: Replace NF_CT_ASSERT() with WARN_ON().

This patch removes NF_CT_ASSERT() and instead uses WARN_ON().

Signed-off-by: Varsha Rao <rvarsha016@gmail.com>
---
 include/net/netfilter/nf_conntrack.h           |  2 +-
 net/ipv4/netfilter/nf_conntrack_proto_icmp.c   |  2 +-
 net/ipv4/netfilter/nf_nat_l3proto_ipv4.c       |  6 +++---
 net/ipv4/netfilter/nf_nat_masquerade_ipv4.c    |  8 ++++----
 net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c |  2 +-
 net/ipv6/netfilter/nf_nat_l3proto_ipv6.c       |  6 +++---
 net/ipv6/netfilter/nf_nat_masquerade_ipv6.c    |  4 ++--
 net/netfilter/nf_conntrack_core.c              | 11 +++++------
 net/netfilter/nf_conntrack_expect.c            |  4 ++--
 net/netfilter/nf_conntrack_extend.c            |  2 +-
 net/netfilter/nf_conntrack_standalone.c        |  6 +++---
 net/netfilter/nf_nat_core.c                    |  4 ++--
 net/netfilter/nf_nat_redirect.c                |  6 +++---
 net/netfilter/xt_NETMAP.c                      |  8 ++++----
 net/netfilter/xt_nat.c                         | 20 ++++++++++----------
 15 files changed, 45 insertions(+), 46 deletions(-)

(limited to 'include/net')

diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index 6e6f678aaac7..0385cb08c478 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -159,7 +159,7 @@ nf_ct_get(const struct sk_buff *skb, enum ip_conntrack_info *ctinfo)
 /* decrement reference count on a conntrack */
 static inline void nf_ct_put(struct nf_conn *ct)
 {
-	NF_CT_ASSERT(ct);
+	WARN_ON(!ct);
 	nf_conntrack_put(&ct->ct_general);
 }
 
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index ce108a996316..a046c298413a 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -126,7 +126,7 @@ icmp_error_message(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb,
 	enum ip_conntrack_info ctinfo;
 	struct nf_conntrack_zone tmp;
 
-	NF_CT_ASSERT(!skb_nfct(skb));
+	WARN_ON(skb_nfct(skb));
 	zone = nf_ct_zone_tmpl(tmpl, skb, &tmp);
 
 	/* Are they talking about one of our connections? */
diff --git a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
index feedd759ca80..a0f37b208268 100644
--- a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
@@ -190,7 +190,7 @@ int nf_nat_icmp_reply_translation(struct sk_buff *skb,
 	struct nf_conntrack_tuple target;
 	unsigned long statusbit;
 
-	NF_CT_ASSERT(ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED_REPLY);
+	WARN_ON(ctinfo != IP_CT_RELATED && ctinfo != IP_CT_RELATED_REPLY);
 
 	if (!skb_make_writable(skb, hdrlen + sizeof(*inside)))
 		return 0;
@@ -306,8 +306,8 @@ nf_nat_ipv4_fn(void *priv, struct sk_buff *skb,
 
 	default:
 		/* ESTABLISHED */
-		NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED ||
-			     ctinfo == IP_CT_ESTABLISHED_REPLY);
+		WARN_ON(ctinfo != IP_CT_ESTABLISHED &&
+			ctinfo != IP_CT_ESTABLISHED_REPLY);
 		if (nf_nat_oif_changed(state->hook, ctinfo, nat, state->out))
 			goto oif_changed;
 	}
diff --git a/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c b/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c
index f39037fca923..0c366aad89cb 100644
--- a/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c
+++ b/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c
@@ -34,12 +34,12 @@ nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum,
 	const struct rtable *rt;
 	__be32 newsrc, nh;
 
-	NF_CT_ASSERT(hooknum == NF_INET_POST_ROUTING);
+	WARN_ON(hooknum != NF_INET_POST_ROUTING);
 
 	ct = nf_ct_get(skb, &ctinfo);
 
-	NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
-			    ctinfo == IP_CT_RELATED_REPLY));
+	WARN_ON(!(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
+			 ctinfo == IP_CT_RELATED_REPLY)));
 
 	/* Source address is 0.0.0.0 - locally generated packet that is
 	 * probably not supposed to be masqueraded.
@@ -96,7 +96,7 @@ static int masq_device_event(struct notifier_block *this,
 		 * conntracks which were associated with that device,
 		 * and forget them.
 		 */
-		NF_CT_ASSERT(dev->ifindex != 0);
+		WARN_ON(dev->ifindex == 0);
 
 		nf_ct_iterate_cleanup_net(net, device_cmp,
 					  (void *)(long)dev->ifindex, 0, 0);
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index 30e34c4de003..a9e1fd1a8536 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -136,7 +136,7 @@ icmpv6_error_message(struct net *net, struct nf_conn *tmpl,
 	enum ip_conntrack_info ctinfo;
 	struct nf_conntrack_zone tmp;
 
-	NF_CT_ASSERT(!skb_nfct(skb));
+	WARN_ON(skb_nfct(skb));
 
 	/* Are they talking about one of our connections? */
 	if (!nf_ct_get_tuplepr(skb,
diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
index b2b4f031b3a1..46d6dba50698 100644
--- a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
@@ -196,7 +196,7 @@ int nf_nat_icmpv6_reply_translation(struct sk_buff *skb,
 	struct nf_conntrack_tuple target;
 	unsigned long statusbit;
 
-	NF_CT_ASSERT(ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED_REPLY);
+	WARN_ON(ctinfo != IP_CT_RELATED && ctinfo != IP_CT_RELATED_REPLY);
 
 	if (!skb_make_writable(skb, hdrlen + sizeof(*inside)))
 		return 0;
@@ -319,8 +319,8 @@ nf_nat_ipv6_fn(void *priv, struct sk_buff *skb,
 
 	default:
 		/* ESTABLISHED */
-		NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED ||
-			     ctinfo == IP_CT_ESTABLISHED_REPLY);
+		WARN_ON(ctinfo != IP_CT_ESTABLISHED &&
+			ctinfo != IP_CT_ESTABLISHED_REPLY);
 		if (nf_nat_oif_changed(state->hook, ctinfo, nat, state->out))
 			goto oif_changed;
 	}
diff --git a/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c b/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
index d7b679037bae..98f61fcb9108 100644
--- a/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
+++ b/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
@@ -36,8 +36,8 @@ nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range *range,
 	struct nf_nat_range newrange;
 
 	ct = nf_ct_get(skb, &ctinfo);
-	NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
-			    ctinfo == IP_CT_RELATED_REPLY));
+	WARN_ON(!(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
+			 ctinfo == IP_CT_RELATED_REPLY)));
 
 	if (ipv6_dev_get_saddr(nf_ct_net(ct), out,
 			       &ipv6_hdr(skb)->daddr, 0, &src) < 0)
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index ee5555dd7ebc..99c753c485ee 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -403,7 +403,7 @@ destroy_conntrack(struct nf_conntrack *nfct)
 	const struct nf_conntrack_l4proto *l4proto;
 
 	pr_debug("destroy_conntrack(%p)\n", ct);
-	NF_CT_ASSERT(atomic_read(&nfct->use) == 0);
+	WARN_ON(atomic_read(&nfct->use) != 0);
 
 	if (unlikely(nf_ct_is_template(ct))) {
 		nf_ct_tmpl_free(ct);
@@ -756,12 +756,11 @@ __nf_conntrack_confirm(struct sk_buff *skb)
 	 * connections for unconfirmed conns.  But packet copies and
 	 * REJECT will give spurious warnings here.
 	 */
-	/* NF_CT_ASSERT(atomic_read(&ct->ct_general.use) == 1); */
 
 	/* No external references means no one else could have
 	 * confirmed us.
 	 */
-	NF_CT_ASSERT(!nf_ct_is_confirmed(ct));
+	WARN_ON(nf_ct_is_confirmed(ct));
 	pr_debug("Confirming conntrack %p\n", ct);
 	/* We have to check the DYING flag after unlink to prevent
 	 * a race against nf_ct_get_next_corpse() possibly called from
@@ -1160,7 +1159,7 @@ void nf_conntrack_free(struct nf_conn *ct)
 	/* A freed object has refcnt == 0, that's
 	 * the golden rule for SLAB_TYPESAFE_BY_RCU
 	 */
-	NF_CT_ASSERT(atomic_read(&ct->ct_general.use) == 0);
+	WARN_ON(atomic_read(&ct->ct_general.use) != 0);
 
 	nf_ct_ext_destroy(ct);
 	nf_ct_ext_free(ct);
@@ -1468,7 +1467,7 @@ void nf_conntrack_alter_reply(struct nf_conn *ct,
 	struct nf_conn_help *help = nfct_help(ct);
 
 	/* Should be unconfirmed, so not in hash table yet */
-	NF_CT_ASSERT(!nf_ct_is_confirmed(ct));
+	WARN_ON(nf_ct_is_confirmed(ct));
 
 	pr_debug("Altering reply tuple of %p to ", ct);
 	nf_ct_dump_tuple(newreply);
@@ -1490,7 +1489,7 @@ void __nf_ct_refresh_acct(struct nf_conn *ct,
 			  unsigned long extra_jiffies,
 			  int do_acct)
 {
-	NF_CT_ASSERT(skb);
+	WARN_ON(!skb);
 
 	/* Only update if this is not a fixed timeout */
 	if (test_bit(IPS_FIXED_TIMEOUT_BIT, &ct->status))
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index dad2c0c22ad5..64778f9a8548 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -51,8 +51,8 @@ void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp,
 	struct nf_conn_help *master_help = nfct_help(exp->master);
 	struct net *net = nf_ct_exp_net(exp);
 
-	NF_CT_ASSERT(master_help);
-	NF_CT_ASSERT(!timer_pending(&exp->timeout));
+	WARN_ON(!master_help);
+	WARN_ON(timer_pending(&exp->timeout));
 
 	hlist_del_rcu(&exp->hnode);
 	net->ct.expect_count--;
diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c
index 6c605e88ebae..9fe0ddc333fb 100644
--- a/net/netfilter/nf_conntrack_extend.c
+++ b/net/netfilter/nf_conntrack_extend.c
@@ -47,7 +47,7 @@ void *nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp)
 	struct nf_ct_ext_type *t;
 
 	/* Conntrack must not be confirmed to avoid races on reallocation. */
-	NF_CT_ASSERT(!nf_ct_is_confirmed(ct));
+	WARN_ON(nf_ct_is_confirmed(ct));
 
 	old = ct->ext;
 
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 9eb85858d764..5a101caa3e12 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -287,7 +287,7 @@ static int ct_seq_show(struct seq_file *s, void *v)
 	struct net *net = seq_file_net(s);
 	int ret = 0;
 
-	NF_CT_ASSERT(ct);
+	WARN_ON(!ct);
 	if (unlikely(!atomic_inc_not_zero(&ct->ct_general.use)))
 		return 0;
 
@@ -304,9 +304,9 @@ static int ct_seq_show(struct seq_file *s, void *v)
 		goto release;
 
 	l3proto = __nf_ct_l3proto_find(nf_ct_l3num(ct));
-	NF_CT_ASSERT(l3proto);
+	WARN_ON(!l3proto);
 	l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
-	NF_CT_ASSERT(l4proto);
+	WARN_ON(!l4proto);
 
 	ret = -ENOSPC;
 	seq_printf(s, "%-8s %u %-8s %u %ld ",
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index b1d3740ae36a..40573aa6c133 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -414,8 +414,8 @@ nf_nat_setup_info(struct nf_conn *ct,
 	if (nf_ct_is_confirmed(ct))
 		return NF_ACCEPT;
 
-	NF_CT_ASSERT(maniptype == NF_NAT_MANIP_SRC ||
-		     maniptype == NF_NAT_MANIP_DST);
+	WARN_ON(maniptype != NF_NAT_MANIP_SRC &&
+		maniptype != NF_NAT_MANIP_DST);
 	BUG_ON(nf_nat_initialized(ct, maniptype));
 
 	/* What we've got will look like inverse of reply. Normally
diff --git a/net/netfilter/nf_nat_redirect.c b/net/netfilter/nf_nat_redirect.c
index 86067560a318..25b06b959118 100644
--- a/net/netfilter/nf_nat_redirect.c
+++ b/net/netfilter/nf_nat_redirect.c
@@ -38,11 +38,11 @@ nf_nat_redirect_ipv4(struct sk_buff *skb,
 	__be32 newdst;
 	struct nf_nat_range newrange;
 
-	NF_CT_ASSERT(hooknum == NF_INET_PRE_ROUTING ||
-		     hooknum == NF_INET_LOCAL_OUT);
+	WARN_ON(hooknum != NF_INET_PRE_ROUTING &&
+		hooknum != NF_INET_LOCAL_OUT);
 
 	ct = nf_ct_get(skb, &ctinfo);
-	NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED));
+	WARN_ON(!(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED)));
 
 	/* Local packets: make them go to loopback */
 	if (hooknum == NF_INET_LOCAL_OUT) {
diff --git a/net/netfilter/xt_NETMAP.c b/net/netfilter/xt_NETMAP.c
index e45a01255e70..58aa9dd3c5b7 100644
--- a/net/netfilter/xt_NETMAP.c
+++ b/net/netfilter/xt_NETMAP.c
@@ -77,10 +77,10 @@ netmap_tg4(struct sk_buff *skb, const struct xt_action_param *par)
 	const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
 	struct nf_nat_range newrange;
 
-	NF_CT_ASSERT(xt_hooknum(par) == NF_INET_PRE_ROUTING ||
-		     xt_hooknum(par) == NF_INET_POST_ROUTING ||
-		     xt_hooknum(par) == NF_INET_LOCAL_OUT ||
-		     xt_hooknum(par) == NF_INET_LOCAL_IN);
+	WARN_ON(xt_hooknum(par) != NF_INET_PRE_ROUTING &&
+		xt_hooknum(par) != NF_INET_POST_ROUTING &&
+		xt_hooknum(par) != NF_INET_LOCAL_OUT &&
+		xt_hooknum(par) != NF_INET_LOCAL_IN);
 	ct = nf_ct_get(skb, &ctinfo);
 
 	netmask = ~(mr->range[0].min_ip ^ mr->range[0].max_ip);
diff --git a/net/netfilter/xt_nat.c b/net/netfilter/xt_nat.c
index 8107b3eb865f..0fd14d1eb09d 100644
--- a/net/netfilter/xt_nat.c
+++ b/net/netfilter/xt_nat.c
@@ -58,9 +58,9 @@ xt_snat_target_v0(struct sk_buff *skb, const struct xt_action_param *par)
 	struct nf_conn *ct;
 
 	ct = nf_ct_get(skb, &ctinfo);
-	NF_CT_ASSERT(ct != NULL &&
-		     (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
-		      ctinfo == IP_CT_RELATED_REPLY));
+	WARN_ON(!(ct != NULL &&
+		 (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
+		  ctinfo == IP_CT_RELATED_REPLY)));
 
 	xt_nat_convert_range(&range, &mr->range[0]);
 	return nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC);
@@ -75,8 +75,8 @@ xt_dnat_target_v0(struct sk_buff *skb, const struct xt_action_param *par)
 	struct nf_conn *ct;
 
 	ct = nf_ct_get(skb, &ctinfo);
-	NF_CT_ASSERT(ct != NULL &&
-		     (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED));
+	WARN_ON(!(ct != NULL &&
+		 (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED)));
 
 	xt_nat_convert_range(&range, &mr->range[0]);
 	return nf_nat_setup_info(ct, &range, NF_NAT_MANIP_DST);
@@ -90,9 +90,9 @@ xt_snat_target_v1(struct sk_buff *skb, const struct xt_action_param *par)
 	struct nf_conn *ct;
 
 	ct = nf_ct_get(skb, &ctinfo);
-	NF_CT_ASSERT(ct != NULL &&
-		     (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
-		      ctinfo == IP_CT_RELATED_REPLY));
+	WARN_ON(!(ct != NULL &&
+		 (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
+		  ctinfo == IP_CT_RELATED_REPLY)));
 
 	return nf_nat_setup_info(ct, range, NF_NAT_MANIP_SRC);
 }
@@ -105,8 +105,8 @@ xt_dnat_target_v1(struct sk_buff *skb, const struct xt_action_param *par)
 	struct nf_conn *ct;
 
 	ct = nf_ct_get(skb, &ctinfo);
-	NF_CT_ASSERT(ct != NULL &&
-		     (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED));
+	WARN_ON(!(ct != NULL &&
+		 (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED)));
 
 	return nf_nat_setup_info(ct, range, NF_NAT_MANIP_DST);
 }
-- 
cgit v1.2.3


From 9efdb14f76f4d7591cd4d7a436ebd716b19703b6 Mon Sep 17 00:00:00 2001
From: Varsha Rao <rvarsha016@gmail.com>
Date: Wed, 30 Aug 2017 13:37:12 +0530
Subject: net: Remove CONFIG_NETFILTER_DEBUG and _ASSERT() macros.

This patch removes CONFIG_NETFILTER_DEBUG and _ASSERT() macros as they
are no longer required. Replace _ASSERT() macros with WARN_ON().

Signed-off-by: Varsha Rao <rvarsha016@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 arch/parisc/configs/c3000_defconfig  |  1 -
 arch/sh/configs/se7751_defconfig     |  1 -
 include/net/netfilter/nf_conntrack.h |  6 ------
 net/Kconfig                          |  7 -------
 net/bridge/netfilter/ebtables.c      | 20 ++++++++------------
 net/ipv4/netfilter/ip_tables.c       | 12 +++---------
 net/ipv6/netfilter/ip6_tables.c      | 12 +++---------
 7 files changed, 14 insertions(+), 45 deletions(-)

(limited to 'include/net')

diff --git a/arch/parisc/configs/c3000_defconfig b/arch/parisc/configs/c3000_defconfig
index 0764d3971cf6..8d41a73bd71b 100644
--- a/arch/parisc/configs/c3000_defconfig
+++ b/arch/parisc/configs/c3000_defconfig
@@ -31,7 +31,6 @@ CONFIG_IP_PNP_BOOTP=y
 CONFIG_INET6_IPCOMP=m
 CONFIG_IPV6_TUNNEL=m
 CONFIG_NETFILTER=y
-CONFIG_NETFILTER_DEBUG=y
 CONFIG_NET_PKTGEN=m
 CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
 CONFIG_DEVTMPFS=y
diff --git a/arch/sh/configs/se7751_defconfig b/arch/sh/configs/se7751_defconfig
index 75c92fc1876b..56b5e4ce8d4a 100644
--- a/arch/sh/configs/se7751_defconfig
+++ b/arch/sh/configs/se7751_defconfig
@@ -28,7 +28,6 @@ CONFIG_IP_PNP_RARP=y
 # CONFIG_INET_LRO is not set
 # CONFIG_IPV6 is not set
 CONFIG_NETFILTER=y
-CONFIG_NETFILTER_DEBUG=y
 CONFIG_IP_NF_QUEUE=y
 CONFIG_MTD=y
 CONFIG_MTD_PARTITIONS=y
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index 0385cb08c478..fdc9c64a1c94 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -44,12 +44,6 @@ union nf_conntrack_expect_proto {
 #include <linux/types.h>
 #include <linux/skbuff.h>
 
-#ifdef CONFIG_NETFILTER_DEBUG
-#define NF_CT_ASSERT(x)		WARN_ON(!(x))
-#else
-#define NF_CT_ASSERT(x)
-#endif
-
 #include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
 #include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
 
diff --git a/net/Kconfig b/net/Kconfig
index e0e7c62c88f0..9dba2715919d 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -166,13 +166,6 @@ menuconfig NETFILTER
 
 if NETFILTER
 
-config NETFILTER_DEBUG
-	bool "Network packet filtering debugging"
-	depends on NETFILTER
-	help
-	  You can say Y here if you want to get additional messages useful in
-	  debugging the netfilter code.
-
 config NETFILTER_ADVANCED
 	bool "Advanced netfilter configuration"
 	depends on NETFILTER
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 54c7ef4e970e..83951f978445 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -252,13 +252,11 @@ unsigned int ebt_do_table(struct sk_buff *skb,
 		}
 		if (verdict == EBT_RETURN) {
 letsreturn:
-#ifdef CONFIG_NETFILTER_DEBUG
-			if (sp == 0) {
-				BUGPRINT("RETURN on base chain");
+			if (WARN(sp == 0, "RETURN on base chain")) {
 				/* act like this is EBT_CONTINUE */
 				goto letscontinue;
 			}
-#endif
+
 			sp--;
 			/* put all the local variables right */
 			i = cs[sp].n;
@@ -271,26 +269,24 @@ letsreturn:
 		}
 		if (verdict == EBT_CONTINUE)
 			goto letscontinue;
-#ifdef CONFIG_NETFILTER_DEBUG
-		if (verdict < 0) {
-			BUGPRINT("bogus standard verdict\n");
+
+		if (WARN(verdict < 0, "bogus standard verdict\n")) {
 			read_unlock_bh(&table->lock);
 			return NF_DROP;
 		}
-#endif
+
 		/* jump to a udc */
 		cs[sp].n = i + 1;
 		cs[sp].chaininfo = chaininfo;
 		cs[sp].e = ebt_next_entry(point);
 		i = 0;
 		chaininfo = (struct ebt_entries *) (base + verdict);
-#ifdef CONFIG_NETFILTER_DEBUG
-		if (chaininfo->distinguisher) {
-			BUGPRINT("jump to non-chain\n");
+
+		if (WARN(chaininfo->distinguisher, "jump to non-chain\n")) {
 			read_unlock_bh(&table->lock);
 			return NF_DROP;
 		}
-#endif
+
 		nentries = chaininfo->nentries;
 		point = (struct ebt_entry *)chaininfo->data;
 		counter_base = cb_base + chaininfo->counter_offset;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index ce1d97579ce8..576cba2b57e9 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -35,12 +35,6 @@ MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
 MODULE_DESCRIPTION("IPv4 packet filter");
 
-#ifdef CONFIG_NETFILTER_DEBUG
-#define IP_NF_ASSERT(x)		WARN_ON(!(x))
-#else
-#define IP_NF_ASSERT(x)
-#endif
-
 void *ipt_alloc_initial_table(const struct xt_table *info)
 {
 	return xt_alloc_initial_table(ipt, IPT);
@@ -263,7 +257,7 @@ ipt_do_table(struct sk_buff *skb,
 	acpar.hotdrop = false;
 	acpar.state   = state;
 
-	IP_NF_ASSERT(table->valid_hooks & (1 << hook));
+	WARN_ON(!(table->valid_hooks & (1 << hook)));
 	local_bh_disable();
 	addend = xt_write_recseq_begin();
 	private = table->private;
@@ -293,7 +287,7 @@ ipt_do_table(struct sk_buff *skb,
 		const struct xt_entry_match *ematch;
 		struct xt_counters *counter;
 
-		IP_NF_ASSERT(e);
+		WARN_ON(!e);
 		if (!ip_packet_match(ip, indev, outdev,
 		    &e->ip, acpar.fragoff)) {
  no_match:
@@ -312,7 +306,7 @@ ipt_do_table(struct sk_buff *skb,
 		ADD_COUNTER(*counter, skb->len, 1);
 
 		t = ipt_get_target(e);
-		IP_NF_ASSERT(t->u.kernel.target);
+		WARN_ON(!t->u.kernel.target);
 
 #if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)
 		/* The packet is traced: log it */
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 9f6644958e5e..54b1e75eded1 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -39,12 +39,6 @@ MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
 MODULE_DESCRIPTION("IPv6 packet filter");
 
-#ifdef CONFIG_NETFILTER_DEBUG
-#define IP_NF_ASSERT(x)	WARN_ON(!(x))
-#else
-#define IP_NF_ASSERT(x)
-#endif
-
 void *ip6t_alloc_initial_table(const struct xt_table *info)
 {
 	return xt_alloc_initial_table(ip6t, IP6T);
@@ -284,7 +278,7 @@ ip6t_do_table(struct sk_buff *skb,
 	acpar.hotdrop = false;
 	acpar.state   = state;
 
-	IP_NF_ASSERT(table->valid_hooks & (1 << hook));
+	WARN_ON(!(table->valid_hooks & (1 << hook)));
 
 	local_bh_disable();
 	addend = xt_write_recseq_begin();
@@ -315,7 +309,7 @@ ip6t_do_table(struct sk_buff *skb,
 		const struct xt_entry_match *ematch;
 		struct xt_counters *counter;
 
-		IP_NF_ASSERT(e);
+		WARN_ON(!e);
 		acpar.thoff = 0;
 		if (!ip6_packet_match(skb, indev, outdev, &e->ipv6,
 		    &acpar.thoff, &acpar.fragoff, &acpar.hotdrop)) {
@@ -335,7 +329,7 @@ ip6t_do_table(struct sk_buff *skb,
 		ADD_COUNTER(*counter, skb->len, 1);
 
 		t = ip6t_get_target_c(e);
-		IP_NF_ASSERT(t->u.kernel.target);
+		WARN_ON(!t->u.kernel.target);
 
 #if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)
 		/* The packet is traced: log it */
-- 
cgit v1.2.3


From 53168215909281a09d3afc6fb51a9d4f81f74d39 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 22 Jun 2017 12:20:30 +0200
Subject: mac80211: fix VLAN handling with TXQs

With TXQs, the AP_VLAN interfaces are resolved to their owner AP
interface when enqueuing the frame, which makes sense since the
frame really goes out on that as far as the driver is concerned.

However, this introduces a problem: frames to be encrypted with
a VLAN-specific GTK will now be encrypted with the AP GTK, since
the information about which virtual interface to use to select
the key is taken from the TXQ.

Fix this by preserving info->control.vif and using that in the
dequeue function. This now requires doing the driver-mapping
in the dequeue as well.

Since there's no way to filter the frames that are sitting on a
TXQ, drop all frames, which may affect other interfaces, when an
AP_VLAN is removed.

Cc: stable@vger.kernel.org
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 15 ++-------------
 net/mac80211/iface.c   | 17 +++++++++++++++--
 net/mac80211/tx.c      | 36 +++++++++++++++++++++++++++++-------
 3 files changed, 46 insertions(+), 22 deletions(-)

(limited to 'include/net')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index f8149ca192b4..885690fa39c8 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -919,21 +919,10 @@ struct ieee80211_tx_info {
 				unsigned long jiffies;
 			};
 			/* NB: vif can be NULL for injected frames */
-			union {
-				/* NB: vif can be NULL for injected frames */
-				struct ieee80211_vif *vif;
-
-				/* When packets are enqueued on txq it's easy
-				 * to re-construct the vif pointer. There's no
-				 * more space in tx_info so it can be used to
-				 * store the necessary enqueue time for packet
-				 * sojourn time computation.
-				 */
-				codel_time_t enqueue_time;
-			};
+			struct ieee80211_vif *vif;
 			struct ieee80211_key_conf *hw_key;
 			u32 flags;
-			/* 4 bytes free */
+			codel_time_t enqueue_time;
 		} control;
 		struct {
 			u64 cookie;
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 9228ac73c429..44399322f356 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -792,6 +792,7 @@ static int ieee80211_open(struct net_device *dev)
 static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
 			      bool going_down)
 {
+	struct ieee80211_sub_if_data *txq_sdata = sdata;
 	struct ieee80211_local *local = sdata->local;
 	struct fq *fq = &local->fq;
 	unsigned long flags;
@@ -937,6 +938,9 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
 
 	switch (sdata->vif.type) {
 	case NL80211_IFTYPE_AP_VLAN:
+		txq_sdata = container_of(sdata->bss,
+					 struct ieee80211_sub_if_data, u.ap);
+
 		mutex_lock(&local->mtx);
 		list_del(&sdata->u.vlan.list);
 		mutex_unlock(&local->mtx);
@@ -1007,8 +1011,17 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
 	}
 	spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
 
-	if (sdata->vif.txq) {
-		struct txq_info *txqi = to_txq_info(sdata->vif.txq);
+	if (txq_sdata->vif.txq) {
+		struct txq_info *txqi = to_txq_info(txq_sdata->vif.txq);
+
+		/*
+		 * FIXME FIXME
+		 *
+		 * We really shouldn't purge the *entire* txqi since that
+		 * contains frames for the other AP_VLANs (and possibly
+		 * the AP itself) as well, but there's no API in FQ now
+		 * to be able to filter.
+		 */
 
 		spin_lock_bh(&fq->lock);
 		ieee80211_txq_purge(local, txqi);
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 8858f4f185e9..94826680cf2b 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1276,11 +1276,6 @@ static void ieee80211_set_skb_enqueue_time(struct sk_buff *skb)
 	IEEE80211_SKB_CB(skb)->control.enqueue_time = codel_get_time();
 }
 
-static void ieee80211_set_skb_vif(struct sk_buff *skb, struct txq_info *txqi)
-{
-	IEEE80211_SKB_CB(skb)->control.vif = txqi->txq.vif;
-}
-
 static u32 codel_skb_len_func(const struct sk_buff *skb)
 {
 	return skb->len;
@@ -3414,6 +3409,7 @@ struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw,
 	struct ieee80211_tx_info *info;
 	struct ieee80211_tx_data tx;
 	ieee80211_tx_result r;
+	struct ieee80211_vif *vif;
 
 	spin_lock_bh(&fq->lock);
 
@@ -3430,8 +3426,6 @@ begin:
 	if (!skb)
 		goto out;
 
-	ieee80211_set_skb_vif(skb, txqi);
-
 	hdr = (struct ieee80211_hdr *)skb->data;
 	info = IEEE80211_SKB_CB(skb);
 
@@ -3488,6 +3482,34 @@ begin:
 		}
 	}
 
+	switch (tx.sdata->vif.type) {
+	case NL80211_IFTYPE_MONITOR:
+		if (tx.sdata->u.mntr.flags & MONITOR_FLAG_ACTIVE) {
+			vif = &tx.sdata->vif;
+			break;
+		}
+		tx.sdata = rcu_dereference(local->monitor_sdata);
+		if (tx.sdata) {
+			vif = &tx.sdata->vif;
+			info->hw_queue =
+				vif->hw_queue[skb_get_queue_mapping(skb)];
+		} else if (ieee80211_hw_check(&local->hw, QUEUE_CONTROL)) {
+			ieee80211_free_txskb(&local->hw, skb);
+			goto begin;
+		} else {
+			vif = NULL;
+		}
+		break;
+	case NL80211_IFTYPE_AP_VLAN:
+		tx.sdata = container_of(tx.sdata->bss,
+					struct ieee80211_sub_if_data, u.ap);
+		/* fall through */
+	default:
+		vif = &tx.sdata->vif;
+		break;
+	}
+
+	IEEE80211_SKB_CB(skb)->control.vif = vif;
 out:
 	spin_unlock_bh(&fq->lock);
 
-- 
cgit v1.2.3


From fd0c88b70060e03a2215259ed29b4b31497ad205 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 5 Sep 2017 10:05:47 +0200
Subject: net/ncsi: fix ncsi_vlan_rx_{add,kill}_vid references

We get a new link error in allmodconfig kernels after ftgmac100
started using the ncsi helpers:

ERROR: "ncsi_vlan_rx_kill_vid" [drivers/net/ethernet/faraday/ftgmac100.ko] undefined!
ERROR: "ncsi_vlan_rx_add_vid" [drivers/net/ethernet/faraday/ftgmac100.ko] undefined!

Related to that, we get another error when CONFIG_NET_NCSI is disabled:

drivers/net/ethernet/faraday/ftgmac100.c:1626:25: error: 'ncsi_vlan_rx_add_vid' undeclared here (not in a function); did you mean 'ncsi_start_dev'?
drivers/net/ethernet/faraday/ftgmac100.c:1627:26: error: 'ncsi_vlan_rx_kill_vid' undeclared here (not in a function); did you mean 'ncsi_vlan_rx_add_vid'?

This fixes both problems at once, using a 'static inline' stub helper
for the disabled case, and exporting the functions when they are present.

Fixes: 51564585d8c6 ("ftgmac100: Support NCSI VLAN filtering when available")
Fixes: 21acf63013ed ("net/ncsi: Configure VLAN tag filter")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ncsi.h     | 10 ++++++++++
 net/ncsi/ncsi-manage.c |  2 ++
 2 files changed, 12 insertions(+)

(limited to 'include/net')

diff --git a/include/net/ncsi.h b/include/net/ncsi.h
index 1f96af46df49..fdc60ff2511d 100644
--- a/include/net/ncsi.h
+++ b/include/net/ncsi.h
@@ -36,6 +36,16 @@ int ncsi_start_dev(struct ncsi_dev *nd);
 void ncsi_stop_dev(struct ncsi_dev *nd);
 void ncsi_unregister_dev(struct ncsi_dev *nd);
 #else /* !CONFIG_NET_NCSI */
+static inline int ncsi_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid)
+{
+	return -EINVAL;
+}
+
+static inline int ncsi_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid)
+{
+	return -EINVAL;
+}
+
 static inline struct ncsi_dev *ncsi_register_dev(struct net_device *dev,
 					void (*notifier)(struct ncsi_dev *nd))
 {
diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c
index 11904b3b702d..3fd3c39e6278 100644
--- a/net/ncsi/ncsi-manage.c
+++ b/net/ncsi/ncsi-manage.c
@@ -1453,6 +1453,7 @@ int ncsi_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid)
 
 	return found ? ncsi_process_next_channel(ndp) : 0;
 }
+EXPORT_SYMBOL_GPL(ncsi_vlan_rx_add_vid);
 
 int ncsi_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid)
 {
@@ -1491,6 +1492,7 @@ int ncsi_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid)
 
 	return found ? ncsi_process_next_channel(ndp) : 0;
 }
+EXPORT_SYMBOL_GPL(ncsi_vlan_rx_kill_vid);
 
 struct ncsi_dev *ncsi_register_dev(struct net_device *dev,
 				   void (*handler)(struct ncsi_dev *ndev))
-- 
cgit v1.2.3


From 3a1214e8b06317b4e71cd3a36344df87b7858e19 Mon Sep 17 00:00:00 2001
From: Tom Herbert <tom@quantonium.net>
Date: Fri, 1 Sep 2017 14:04:11 -0700
Subject: flow_dissector: Cleanup control flow

__skb_flow_dissect is riddled with gotos that make discerning the flow,
debugging, and extending the capability difficult. This patch
reorganizes things so that we only perform goto's after the two main
switch statements (no gotos within the cases now). It also eliminates
several goto labels so that there are only two labels that can be target
for goto.

Reported-by: Alexander Popov <alex.popov@linux.com>
Signed-off-by: Tom Herbert <tom@quantonium.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/flow_dissector.h |   8 ++
 net/core/flow_dissector.c    | 223 ++++++++++++++++++++++++++++---------------
 2 files changed, 153 insertions(+), 78 deletions(-)

(limited to 'include/net')

diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h
index e2663e900b0a..fc3dce730a6b 100644
--- a/include/net/flow_dissector.h
+++ b/include/net/flow_dissector.h
@@ -19,6 +19,14 @@ struct flow_dissector_key_control {
 #define FLOW_DIS_FIRST_FRAG	BIT(1)
 #define FLOW_DIS_ENCAPSULATION	BIT(2)
 
+enum flow_dissect_ret {
+	FLOW_DISSECT_RET_OUT_GOOD,
+	FLOW_DISSECT_RET_OUT_BAD,
+	FLOW_DISSECT_RET_PROTO_AGAIN,
+	FLOW_DISSECT_RET_IPPROTO_AGAIN,
+	FLOW_DISSECT_RET_CONTINUE,
+};
+
 /**
  * struct flow_dissector_key_basic:
  * @thoff: Transport header offset
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index e2eaa1ff948d..e0ea17d1c7fc 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -115,12 +115,6 @@ __be32 __skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto,
 }
 EXPORT_SYMBOL(__skb_flow_get_ports);
 
-enum flow_dissect_ret {
-	FLOW_DISSECT_RET_OUT_GOOD,
-	FLOW_DISSECT_RET_OUT_BAD,
-	FLOW_DISSECT_RET_OUT_PROTO_AGAIN,
-};
-
 static enum flow_dissect_ret
 __skb_flow_dissect_mpls(const struct sk_buff *skb,
 			struct flow_dissector *flow_dissector,
@@ -341,7 +335,7 @@ __skb_flow_dissect_gre(const struct sk_buff *skb,
 	if (flags & FLOW_DISSECTOR_F_STOP_AT_ENCAP)
 		return FLOW_DISSECT_RET_OUT_GOOD;
 
-	return FLOW_DISSECT_RET_OUT_PROTO_AGAIN;
+	return FLOW_DISSECT_RET_PROTO_AGAIN;
 }
 
 static void
@@ -431,6 +425,7 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
 	struct flow_dissector_key_icmp *key_icmp;
 	struct flow_dissector_key_tags *key_tags;
 	struct flow_dissector_key_vlan *key_vlan;
+	enum flow_dissect_ret fdret;
 	bool skip_vlan = false;
 	u8 ip_proto = 0;
 	bool ret;
@@ -482,14 +477,19 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
 	}
 
 proto_again:
+	fdret = FLOW_DISSECT_RET_CONTINUE;
+
 	switch (proto) {
 	case htons(ETH_P_IP): {
 		const struct iphdr *iph;
 		struct iphdr _iph;
-ip:
+
 		iph = __skb_header_pointer(skb, nhoff, sizeof(_iph), data, hlen, &_iph);
-		if (!iph || iph->ihl < 5)
-			goto out_bad;
+		if (!iph || iph->ihl < 5) {
+			fdret = FLOW_DISSECT_RET_OUT_BAD;
+			break;
+		}
+
 		nhoff += iph->ihl * 4;
 
 		ip_proto = iph->protocol;
@@ -509,19 +509,25 @@ ip:
 			key_control->flags |= FLOW_DIS_IS_FRAGMENT;
 
 			if (iph->frag_off & htons(IP_OFFSET)) {
-				goto out_good;
+				fdret = FLOW_DISSECT_RET_OUT_GOOD;
+				break;
 			} else {
 				key_control->flags |= FLOW_DIS_FIRST_FRAG;
-				if (!(flags & FLOW_DISSECTOR_F_PARSE_1ST_FRAG))
-					goto out_good;
+				if (!(flags &
+				      FLOW_DISSECTOR_F_PARSE_1ST_FRAG)) {
+					fdret = FLOW_DISSECT_RET_OUT_GOOD;
+					break;
+				}
 			}
 		}
 
 		__skb_flow_dissect_ipv4(skb, flow_dissector,
 					target_container, data, iph);
 
-		if (flags & FLOW_DISSECTOR_F_STOP_AT_L3)
-			goto out_good;
+		if (flags & FLOW_DISSECTOR_F_STOP_AT_L3) {
+			fdret = FLOW_DISSECT_RET_OUT_GOOD;
+			break;
+		}
 
 		break;
 	}
@@ -529,10 +535,11 @@ ip:
 		const struct ipv6hdr *iph;
 		struct ipv6hdr _iph;
 
-ipv6:
 		iph = __skb_header_pointer(skb, nhoff, sizeof(_iph), data, hlen, &_iph);
-		if (!iph)
-			goto out_bad;
+		if (!iph) {
+			fdret = FLOW_DISSECT_RET_OUT_BAD;
+			break;
+		}
 
 		ip_proto = iph->nexthdr;
 		nhoff += sizeof(struct ipv6hdr);
@@ -561,15 +568,17 @@ ipv6:
 								     target_container);
 				key_tags->flow_label = ntohl(flow_label);
 			}
-			if (flags & FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL)
-				goto out_good;
+			if (flags & FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL) {
+				fdret = FLOW_DISSECT_RET_OUT_GOOD;
+				break;
+			}
 		}
 
 		__skb_flow_dissect_ipv6(skb, flow_dissector,
 					target_container, data, iph);
 
 		if (flags & FLOW_DISSECTOR_F_STOP_AT_L3)
-			goto out_good;
+			fdret = FLOW_DISSECT_RET_OUT_GOOD;
 
 		break;
 	}
@@ -585,12 +594,17 @@ ipv6:
 		if (!vlan_tag_present || eth_type_vlan(skb->protocol)) {
 			vlan = __skb_header_pointer(skb, nhoff, sizeof(_vlan),
 						    data, hlen, &_vlan);
-			if (!vlan)
-				goto out_bad;
+			if (!vlan) {
+				fdret = FLOW_DISSECT_RET_OUT_BAD;
+				break;
+			}
+
 			proto = vlan->h_vlan_encapsulated_proto;
 			nhoff += sizeof(*vlan);
-			if (skip_vlan)
-				goto proto_again;
+			if (skip_vlan) {
+				fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
+				break;
+			}
 		}
 
 		skip_vlan = true;
@@ -613,7 +627,8 @@ ipv6:
 			}
 		}
 
-		goto proto_again;
+		fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
+		break;
 	}
 	case htons(ETH_P_PPP_SES): {
 		struct {
@@ -621,18 +636,27 @@ ipv6:
 			__be16 proto;
 		} *hdr, _hdr;
 		hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr);
-		if (!hdr)
-			goto out_bad;
+		if (!hdr) {
+			fdret = FLOW_DISSECT_RET_OUT_BAD;
+			break;
+		}
+
 		proto = hdr->proto;
 		nhoff += PPPOE_SES_HLEN;
 		switch (proto) {
 		case htons(PPP_IP):
-			goto ip;
+			proto = htons(ETH_P_IP);
+			fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
+			break;
 		case htons(PPP_IPV6):
-			goto ipv6;
+			proto = htons(ETH_P_IPV6);
+			fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
+			break;
 		default:
-			goto out_bad;
+			fdret = FLOW_DISSECT_RET_OUT_BAD;
+			break;
 		}
+		break;
 	}
 	case htons(ETH_P_TIPC): {
 		struct {
@@ -640,8 +664,10 @@ ipv6:
 			__be32 srcnode;
 		} *hdr, _hdr;
 		hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr);
-		if (!hdr)
-			goto out_bad;
+		if (!hdr) {
+			fdret = FLOW_DISSECT_RET_OUT_BAD;
+			break;
+		}
 
 		if (dissector_uses_key(flow_dissector,
 				       FLOW_DISSECTOR_KEY_TIPC_ADDRS)) {
@@ -651,56 +677,62 @@ ipv6:
 			key_addrs->tipcaddrs.srcnode = hdr->srcnode;
 			key_control->addr_type = FLOW_DISSECTOR_KEY_TIPC_ADDRS;
 		}
-		goto out_good;
+		fdret = FLOW_DISSECT_RET_OUT_GOOD;
+		break;
 	}
 
 	case htons(ETH_P_MPLS_UC):
 	case htons(ETH_P_MPLS_MC):
-mpls:
-		switch (__skb_flow_dissect_mpls(skb, flow_dissector,
+		fdret = __skb_flow_dissect_mpls(skb, flow_dissector,
 						target_container, data,
-						nhoff, hlen)) {
-		case FLOW_DISSECT_RET_OUT_GOOD:
-			goto out_good;
-		case FLOW_DISSECT_RET_OUT_BAD:
-		default:
-			goto out_bad;
-		}
+						nhoff, hlen);
+		break;
 	case htons(ETH_P_FCOE):
-		if ((hlen - nhoff) < FCOE_HEADER_LEN)
-			goto out_bad;
+		if ((hlen - nhoff) < FCOE_HEADER_LEN) {
+			fdret = FLOW_DISSECT_RET_OUT_BAD;
+			break;
+		}
 
 		nhoff += FCOE_HEADER_LEN;
-		goto out_good;
+		fdret = FLOW_DISSECT_RET_OUT_GOOD;
+		break;
 
 	case htons(ETH_P_ARP):
 	case htons(ETH_P_RARP):
-		switch (__skb_flow_dissect_arp(skb, flow_dissector,
+		fdret = __skb_flow_dissect_arp(skb, flow_dissector,
 					       target_container, data,
-					       nhoff, hlen)) {
-		case FLOW_DISSECT_RET_OUT_GOOD:
-			goto out_good;
-		case FLOW_DISSECT_RET_OUT_BAD:
-		default:
-			goto out_bad;
-		}
+					       nhoff, hlen);
+		break;
+
+	default:
+		fdret = FLOW_DISSECT_RET_OUT_BAD;
+		break;
+	}
+
+	/* Process result of proto processing */
+	switch (fdret) {
+	case FLOW_DISSECT_RET_OUT_GOOD:
+		goto out_good;
+	case FLOW_DISSECT_RET_PROTO_AGAIN:
+		goto proto_again;
+	case FLOW_DISSECT_RET_CONTINUE:
+	case FLOW_DISSECT_RET_IPPROTO_AGAIN:
+		break;
+	case FLOW_DISSECT_RET_OUT_BAD:
 	default:
 		goto out_bad;
 	}
 
 ip_proto_again:
+	fdret = FLOW_DISSECT_RET_CONTINUE;
+
 	switch (ip_proto) {
 	case IPPROTO_GRE:
-		switch (__skb_flow_dissect_gre(skb, key_control, flow_dissector,
+		fdret = __skb_flow_dissect_gre(skb, key_control, flow_dissector,
 					       target_container, data,
-					       &proto, &nhoff, &hlen, flags)) {
-		case FLOW_DISSECT_RET_OUT_GOOD:
-			goto out_good;
-		case FLOW_DISSECT_RET_OUT_BAD:
-			goto out_bad;
-		case FLOW_DISSECT_RET_OUT_PROTO_AGAIN:
-			goto proto_again;
-		}
+					       &proto, &nhoff, &hlen, flags);
+		break;
+
 	case NEXTHDR_HOP:
 	case NEXTHDR_ROUTING:
 	case NEXTHDR_DEST: {
@@ -711,13 +743,16 @@ ip_proto_again:
 
 		opthdr = __skb_header_pointer(skb, nhoff, sizeof(_opthdr),
 					      data, hlen, &_opthdr);
-		if (!opthdr)
-			goto out_bad;
+		if (!opthdr) {
+			fdret = FLOW_DISSECT_RET_OUT_BAD;
+			break;
+		}
 
 		ip_proto = opthdr[0];
 		nhoff += (opthdr[1] + 1) << 3;
 
-		goto ip_proto_again;
+		fdret = FLOW_DISSECT_RET_IPPROTO_AGAIN;
+		break;
 	}
 	case NEXTHDR_FRAGMENT: {
 		struct frag_hdr _fh, *fh;
@@ -728,8 +763,10 @@ ip_proto_again:
 		fh = __skb_header_pointer(skb, nhoff, sizeof(_fh),
 					  data, hlen, &_fh);
 
-		if (!fh)
-			goto out_bad;
+		if (!fh) {
+			fdret = FLOW_DISSECT_RET_OUT_BAD;
+			break;
+		}
 
 		key_control->flags |= FLOW_DIS_IS_FRAGMENT;
 
@@ -738,34 +775,50 @@ ip_proto_again:
 
 		if (!(fh->frag_off & htons(IP6_OFFSET))) {
 			key_control->flags |= FLOW_DIS_FIRST_FRAG;
-			if (flags & FLOW_DISSECTOR_F_PARSE_1ST_FRAG)
-				goto ip_proto_again;
+			if (flags & FLOW_DISSECTOR_F_PARSE_1ST_FRAG) {
+				fdret = FLOW_DISSECT_RET_IPPROTO_AGAIN;
+				break;
+			}
 		}
-		goto out_good;
+
+		fdret = FLOW_DISSECT_RET_OUT_GOOD;
+		break;
 	}
 	case IPPROTO_IPIP:
 		proto = htons(ETH_P_IP);
 
 		key_control->flags |= FLOW_DIS_ENCAPSULATION;
-		if (flags & FLOW_DISSECTOR_F_STOP_AT_ENCAP)
-			goto out_good;
+		if (flags & FLOW_DISSECTOR_F_STOP_AT_ENCAP) {
+			fdret = FLOW_DISSECT_RET_OUT_GOOD;
+			break;
+		}
+
+		fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
+		break;
 
-		goto ip;
 	case IPPROTO_IPV6:
 		proto = htons(ETH_P_IPV6);
 
 		key_control->flags |= FLOW_DIS_ENCAPSULATION;
-		if (flags & FLOW_DISSECTOR_F_STOP_AT_ENCAP)
-			goto out_good;
+		if (flags & FLOW_DISSECTOR_F_STOP_AT_ENCAP) {
+			fdret = FLOW_DISSECT_RET_OUT_GOOD;
+			break;
+		}
+
+		fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
+		break;
+
 
-		goto ipv6;
 	case IPPROTO_MPLS:
 		proto = htons(ETH_P_MPLS_UC);
-		goto mpls;
+		fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
+		break;
+
 	case IPPROTO_TCP:
 		__skb_flow_dissect_tcp(skb, flow_dissector, target_container,
 				       data, nhoff, hlen);
 		break;
+
 	default:
 		break;
 	}
@@ -787,6 +840,20 @@ ip_proto_again:
 		key_icmp->icmp = skb_flow_get_be16(skb, nhoff, data, hlen);
 	}
 
+	/* Process result of IP proto processing */
+	switch (fdret) {
+	case FLOW_DISSECT_RET_PROTO_AGAIN:
+		goto proto_again;
+	case FLOW_DISSECT_RET_IPPROTO_AGAIN:
+		goto ip_proto_again;
+	case FLOW_DISSECT_RET_OUT_GOOD:
+	case FLOW_DISSECT_RET_CONTINUE:
+		break;
+	case FLOW_DISSECT_RET_OUT_BAD:
+	default:
+		goto out_bad;
+	}
+
 out_good:
 	ret = true;
 
-- 
cgit v1.2.3


From 55199df6d2af55be414f40856efcb527811001bb Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Sun, 3 Sep 2017 20:27:00 -0700
Subject: net: dsa: Allow switch drivers to indicate number of TX queues

Let switch drivers indicate how many TX queues they support. Some
switches, such as Broadcom Starfighter 2 are designed with 8 egress
queues. Future changes will allow us to leverage the queue mapping and
direct the transmission towards a particular queue.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dsa.h | 3 +++
 net/dsa/slave.c   | 8 ++++++--
 2 files changed, 9 insertions(+), 2 deletions(-)

(limited to 'include/net')

diff --git a/include/net/dsa.h b/include/net/dsa.h
index 398ca8d70ccd..dd44d6ce1097 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -243,6 +243,9 @@ struct dsa_switch {
 	/* devlink used to represent this switch device */
 	struct devlink		*devlink;
 
+	/* Number of switch port queues */
+	unsigned int		num_tx_queues;
+
 	/* Dynamically allocated ports, keep last */
 	size_t num_ports;
 	struct dsa_port ports[];
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 78e78a6e6833..2afa99506f8b 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -1259,8 +1259,12 @@ int dsa_slave_create(struct dsa_port *port, const char *name)
 	cpu_dp = ds->dst->cpu_dp;
 	master = cpu_dp->netdev;
 
-	slave_dev = alloc_netdev(sizeof(struct dsa_slave_priv), name,
-				 NET_NAME_UNKNOWN, ether_setup);
+	if (!ds->num_tx_queues)
+		ds->num_tx_queues = 1;
+
+	slave_dev = alloc_netdev_mqs(sizeof(struct dsa_slave_priv), name,
+				     NET_NAME_UNKNOWN, ether_setup,
+				     ds->num_tx_queues, 1);
 	if (slave_dev == NULL)
 		return -ENOMEM;
 
-- 
cgit v1.2.3


From e1bf1687740ce1a3598a1c5e452b852ff2190682 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Wed, 6 Sep 2017 14:39:51 +0200
Subject: netfilter: nat: Revert "netfilter: nat: convert nat bysrc hash to
 rhashtable"

This reverts commit 870190a9ec9075205c0fa795a09fa931694a3ff1.

It was not a good idea. The custom hash table was a much better
fit for this purpose.

A fast lookup is not essential, in fact for most cases there is no lookup
at all because original tuple is not taken and can be used as-is.
What needs to be fast is insertion and deletion.

rhlist removal however requires a rhlist walk.
We can have thousands of entries in such a list if source port/addresses
are reused for multiple flows, if this happens removal requests are so
expensive that deletions of a few thousand flows can take several
seconds(!).

The advantages that we got from rhashtable are:
1) table auto-sizing
2) multiple locks

1) would be nice to have, but it is not essential as we have at
most one lookup per new flow, so even a million flows in the bysource
table are not a problem compared to current deletion cost.
2) is easy to add to custom hash table.

I tried to add hlist_node to rhlist to speed up rhltable_remove but this
isn't doable without changing semantics.  rhltable_remove_fast will
check that the to-be-deleted object is part of the table and that
requires a list walk that we want to avoid.

Furthermore, using hlist_node increases size of struct rhlist_head, which
in turn increases nf_conn size.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=196821
Reported-by: Ivan Babrou <ibobrik@gmail.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack.h |   3 +-
 include/net/netfilter/nf_nat.h       |   1 -
 net/netfilter/nf_nat_core.c          | 130 ++++++++++++++---------------------
 3 files changed, 54 insertions(+), 80 deletions(-)

(limited to 'include/net')

diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index fdc9c64a1c94..8f3bd30511de 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -17,7 +17,6 @@
 #include <linux/bitops.h>
 #include <linux/compiler.h>
 #include <linux/atomic.h>
-#include <linux/rhashtable.h>
 
 #include <linux/netfilter/nf_conntrack_tcp.h>
 #include <linux/netfilter/nf_conntrack_dccp.h>
@@ -77,7 +76,7 @@ struct nf_conn {
 	possible_net_t ct_net;
 
 #if IS_ENABLED(CONFIG_NF_NAT)
-	struct rhlist_head nat_bysource;
+	struct hlist_node	nat_bysource;
 #endif
 	/* all members below initialized via memset */
 	u8 __nfct_init_offset[0];
diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h
index 05c82a1a4267..b71701302e61 100644
--- a/include/net/netfilter/nf_nat.h
+++ b/include/net/netfilter/nf_nat.h
@@ -1,6 +1,5 @@
 #ifndef _NF_NAT_H
 #define _NF_NAT_H
-#include <linux/rhashtable.h>
 #include <linux/netfilter_ipv4.h>
 #include <linux/netfilter/nf_nat.h>
 #include <net/netfilter/nf_conntrack_tuple.h>
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index dc3519cc7209..f090419f5f97 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -30,19 +30,17 @@
 #include <net/netfilter/nf_conntrack_zones.h>
 #include <linux/netfilter/nf_nat.h>
 
+static DEFINE_SPINLOCK(nf_nat_lock);
+
 static DEFINE_MUTEX(nf_nat_proto_mutex);
 static const struct nf_nat_l3proto __rcu *nf_nat_l3protos[NFPROTO_NUMPROTO]
 						__read_mostly;
 static const struct nf_nat_l4proto __rcu **nf_nat_l4protos[NFPROTO_NUMPROTO]
 						__read_mostly;
 
-struct nf_nat_conn_key {
-	const struct net *net;
-	const struct nf_conntrack_tuple *tuple;
-	const struct nf_conntrack_zone *zone;
-};
-
-static struct rhltable nf_nat_bysource_table;
+static struct hlist_head *nf_nat_bysource __read_mostly;
+static unsigned int nf_nat_htable_size __read_mostly;
+static unsigned int nf_nat_hash_rnd __read_mostly;
 
 inline const struct nf_nat_l3proto *
 __nf_nat_l3proto_find(u8 family)
@@ -118,17 +116,19 @@ int nf_xfrm_me_harder(struct net *net, struct sk_buff *skb, unsigned int family)
 EXPORT_SYMBOL(nf_xfrm_me_harder);
 #endif /* CONFIG_XFRM */
 
-static u32 nf_nat_bysource_hash(const void *data, u32 len, u32 seed)
+/* We keep an extra hash for each conntrack, for fast searching. */
+static unsigned int
+hash_by_src(const struct net *n, const struct nf_conntrack_tuple *tuple)
 {
-	const struct nf_conntrack_tuple *t;
-	const struct nf_conn *ct = data;
+	unsigned int hash;
+
+	get_random_once(&nf_nat_hash_rnd, sizeof(nf_nat_hash_rnd));
 
-	t = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
 	/* Original src, to ensure we map it consistently if poss. */
+	hash = jhash2((u32 *)&tuple->src, sizeof(tuple->src) / sizeof(u32),
+		      tuple->dst.protonum ^ nf_nat_hash_rnd ^ net_hash_mix(n));
 
-	seed ^= net_hash_mix(nf_ct_net(ct));
-	return jhash2((const u32 *)&t->src, sizeof(t->src) / sizeof(u32),
-		      t->dst.protonum ^ seed);
+	return reciprocal_scale(hash, nf_nat_htable_size);
 }
 
 /* Is this tuple already taken? (not by us) */
@@ -184,28 +184,6 @@ same_src(const struct nf_conn *ct,
 		t->src.u.all == tuple->src.u.all);
 }
 
-static int nf_nat_bysource_cmp(struct rhashtable_compare_arg *arg,
-			       const void *obj)
-{
-	const struct nf_nat_conn_key *key = arg->key;
-	const struct nf_conn *ct = obj;
-
-	if (!same_src(ct, key->tuple) ||
-	    !net_eq(nf_ct_net(ct), key->net) ||
-	    !nf_ct_zone_equal(ct, key->zone, IP_CT_DIR_ORIGINAL))
-		return 1;
-
-	return 0;
-}
-
-static struct rhashtable_params nf_nat_bysource_params = {
-	.head_offset = offsetof(struct nf_conn, nat_bysource),
-	.obj_hashfn = nf_nat_bysource_hash,
-	.obj_cmpfn = nf_nat_bysource_cmp,
-	.nelem_hint = 256,
-	.min_size = 1024,
-};
-
 /* Only called for SRC manip */
 static int
 find_appropriate_src(struct net *net,
@@ -216,26 +194,22 @@ find_appropriate_src(struct net *net,
 		     struct nf_conntrack_tuple *result,
 		     const struct nf_nat_range *range)
 {
+	unsigned int h = hash_by_src(net, tuple);
 	const struct nf_conn *ct;
-	struct nf_nat_conn_key key = {
-		.net = net,
-		.tuple = tuple,
-		.zone = zone
-	};
-	struct rhlist_head *hl, *h;
-
-	hl = rhltable_lookup(&nf_nat_bysource_table, &key,
-			     nf_nat_bysource_params);
 
-	rhl_for_each_entry_rcu(ct, h, hl, nat_bysource) {
-		nf_ct_invert_tuplepr(result,
-				     &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
-		result->dst = tuple->dst;
-
-		if (in_range(l3proto, l4proto, result, range))
-			return 1;
+	hlist_for_each_entry_rcu(ct, &nf_nat_bysource[h], nat_bysource) {
+		if (same_src(ct, tuple) &&
+		    net_eq(net, nf_ct_net(ct)) &&
+		    nf_ct_zone_equal(ct, zone, IP_CT_DIR_ORIGINAL)) {
+			/* Copy source part from reply tuple. */
+			nf_ct_invert_tuplepr(result,
+				       &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+			result->dst = tuple->dst;
+
+			if (in_range(l3proto, l4proto, result, range))
+				return 1;
+		}
 	}
-
 	return 0;
 }
 
@@ -408,6 +382,7 @@ nf_nat_setup_info(struct nf_conn *ct,
 		  const struct nf_nat_range *range,
 		  enum nf_nat_manip_type maniptype)
 {
+	struct net *net = nf_ct_net(ct);
 	struct nf_conntrack_tuple curr_tuple, new_tuple;
 
 	/* Can't setup nat info for confirmed ct. */
@@ -449,19 +424,14 @@ nf_nat_setup_info(struct nf_conn *ct,
 	}
 
 	if (maniptype == NF_NAT_MANIP_SRC) {
-		struct nf_nat_conn_key key = {
-			.net = nf_ct_net(ct),
-			.tuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
-			.zone = nf_ct_zone(ct),
-		};
-		int err;
-
-		err = rhltable_insert_key(&nf_nat_bysource_table,
-					  &key,
-					  &ct->nat_bysource,
-					  nf_nat_bysource_params);
-		if (err)
-			return NF_DROP;
+		unsigned int srchash;
+
+		srchash = hash_by_src(net,
+				      &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+		spin_lock_bh(&nf_nat_lock);
+		hlist_add_head_rcu(&ct->nat_bysource,
+				   &nf_nat_bysource[srchash]);
+		spin_unlock_bh(&nf_nat_lock);
 	}
 
 	/* It's done. */
@@ -570,8 +540,9 @@ static int nf_nat_proto_clean(struct nf_conn *ct, void *data)
 	 * will delete entry from already-freed table.
 	 */
 	clear_bit(IPS_SRC_NAT_DONE_BIT, &ct->status);
-	rhltable_remove(&nf_nat_bysource_table, &ct->nat_bysource,
-			nf_nat_bysource_params);
+	spin_lock_bh(&nf_nat_lock);
+	hlist_del_rcu(&ct->nat_bysource);
+	spin_unlock_bh(&nf_nat_lock);
 
 	/* don't delete conntrack.  Although that would make things a lot
 	 * simpler, we'd end up flushing all conntracks on nat rmmod.
@@ -699,9 +670,11 @@ EXPORT_SYMBOL_GPL(nf_nat_l3proto_unregister);
 /* No one using conntrack by the time this called. */
 static void nf_nat_cleanup_conntrack(struct nf_conn *ct)
 {
-	if (ct->status & IPS_SRC_NAT_DONE)
-		rhltable_remove(&nf_nat_bysource_table, &ct->nat_bysource,
-				nf_nat_bysource_params);
+	if (ct->status & IPS_SRC_NAT_DONE) {
+		spin_lock_bh(&nf_nat_lock);
+		hlist_del_rcu(&ct->nat_bysource);
+		spin_unlock_bh(&nf_nat_lock);
+	}
 }
 
 static struct nf_ct_ext_type nat_extend __read_mostly = {
@@ -825,13 +798,16 @@ static int __init nf_nat_init(void)
 {
 	int ret;
 
-	ret = rhltable_init(&nf_nat_bysource_table, &nf_nat_bysource_params);
-	if (ret)
-		return ret;
+	/* Leave them the same for the moment. */
+	nf_nat_htable_size = nf_conntrack_htable_size;
+
+	nf_nat_bysource = nf_ct_alloc_hashtable(&nf_nat_htable_size, 0);
+	if (!nf_nat_bysource)
+		return -ENOMEM;
 
 	ret = nf_ct_extend_register(&nat_extend);
 	if (ret < 0) {
-		rhltable_destroy(&nf_nat_bysource_table);
+		nf_ct_free_hashtable(nf_nat_bysource, nf_nat_htable_size);
 		printk(KERN_ERR "nf_nat_core: Unable to register extension\n");
 		return ret;
 	}
@@ -865,8 +841,8 @@ static void __exit nf_nat_cleanup(void)
 
 	for (i = 0; i < NFPROTO_NUMPROTO; i++)
 		kfree(nf_nat_l4protos[i]);
-
-	rhltable_destroy(&nf_nat_bysource_table);
+	synchronize_net();
+	nf_ct_free_hashtable(nf_nat_bysource, nf_nat_htable_size);
 }
 
 MODULE_LICENSE("GPL");
-- 
cgit v1.2.3


From d7fb60b9cafb982cb2e46a267646a8dfd4f2e5da Mon Sep 17 00:00:00 2001
From: Cong Wang <xiyou.wangcong@gmail.com>
Date: Mon, 11 Sep 2017 16:33:30 -0700
Subject: net_sched: get rid of tcfa_rcu

gen estimator has been rewritten in commit 1c0d32fde5bd
("net_sched: gen_estimator: complete rewrite of rate estimators"),
the caller is no longer needed to wait for a grace period.
So this patch gets rid of it.

This also completely closes a race condition between action free
path and filter chain add/remove path for the following patch.
Because otherwise the nested RCU callback can't be caught by
rcu_barrier().

Please see also the comments in code.

Cc: Jiri Pirko <jiri@mellanox.com>
Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Cc: Eric Dumazet <edumazet@google.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/act_api.h |  2 --
 net/sched/act_api.c   | 17 ++++++++---------
 2 files changed, 8 insertions(+), 11 deletions(-)

(limited to 'include/net')

diff --git a/include/net/act_api.h b/include/net/act_api.h
index 8f3d5d8b5ae0..b944e0eb93be 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -34,7 +34,6 @@ struct tc_action {
 	struct gnet_stats_queue		tcfa_qstats;
 	struct net_rate_estimator __rcu *tcfa_rate_est;
 	spinlock_t			tcfa_lock;
-	struct rcu_head			tcfa_rcu;
 	struct gnet_stats_basic_cpu __percpu *cpu_bstats;
 	struct gnet_stats_queue __percpu *cpu_qstats;
 	struct tc_cookie	*act_cookie;
@@ -50,7 +49,6 @@ struct tc_action {
 #define tcf_qstats	common.tcfa_qstats
 #define tcf_rate_est	common.tcfa_rate_est
 #define tcf_lock	common.tcfa_lock
-#define tcf_rcu		common.tcfa_rcu
 
 /* Update lastuse only if needed, to avoid dirtying a cache line.
  * We use a temp variable to avoid fetching jiffies twice.
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index a306974e2fb4..fcd7dc7b807a 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -53,10 +53,13 @@ static void tcf_action_goto_chain_exec(const struct tc_action *a,
 	res->goto_tp = rcu_dereference_bh(chain->filter_chain);
 }
 
-static void free_tcf(struct rcu_head *head)
+/* XXX: For standalone actions, we don't need a RCU grace period either, because
+ * actions are always connected to filters and filters are already destroyed in
+ * RCU callbacks, so after a RCU grace period actions are already disconnected
+ * from filters. Readers later can not find us.
+ */
+static void free_tcf(struct tc_action *p)
 {
-	struct tc_action *p = container_of(head, struct tc_action, tcfa_rcu);
-
 	free_percpu(p->cpu_bstats);
 	free_percpu(p->cpu_qstats);
 
@@ -76,11 +79,7 @@ static void tcf_idr_remove(struct tcf_idrinfo *idrinfo, struct tc_action *p)
 	idr_remove_ext(&idrinfo->action_idr, p->tcfa_index);
 	spin_unlock_bh(&idrinfo->lock);
 	gen_kill_estimator(&p->tcfa_rate_est);
-	/*
-	 * gen_estimator est_timer() might access p->tcfa_lock
-	 * or bstats, wait a RCU grace period before freeing p
-	 */
-	call_rcu(&p->tcfa_rcu, free_tcf);
+	free_tcf(p);
 }
 
 int __tcf_idr_release(struct tc_action *p, bool bind, bool strict)
@@ -259,7 +258,7 @@ void tcf_idr_cleanup(struct tc_action *a, struct nlattr *est)
 {
 	if (est)
 		gen_kill_estimator(&a->tcfa_rate_est);
-	call_rcu(&a->tcfa_rcu, free_tcf);
+	free_tcf(a);
 }
 EXPORT_SYMBOL(tcf_idr_cleanup);
 
-- 
cgit v1.2.3


From fa5f7b51fc3080c2b195fa87c7eca7c05e56f673 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Thu, 14 Sep 2017 02:00:54 +0300
Subject: sctp: potential read out of bounds in sctp_ulpevent_type_enabled()

This code causes a static checker warning because Smatch doesn't trust
anything that comes from skb->data.  I've reviewed this code and I do
think skb->data can be controlled by the user here.

The sctp_event_subscribe struct has 13 __u8 fields and we want to see
if ours is non-zero.  sn_type can be any value in the 0-USHRT_MAX range.
We're subtracting SCTP_SN_TYPE_BASE which is 1 << 15 so we could read
either before the start of the struct or after the end.

This is a very old bug and it's surprising that it would go undetected
for so long but my theory is that it just doesn't have a big impact so
it would be hard to notice.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/ulpevent.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/sctp/ulpevent.h b/include/net/sctp/ulpevent.h
index 1060494ac230..b8c86ec1a8f5 100644
--- a/include/net/sctp/ulpevent.h
+++ b/include/net/sctp/ulpevent.h
@@ -153,8 +153,12 @@ __u16 sctp_ulpevent_get_notification_type(const struct sctp_ulpevent *event);
 static inline int sctp_ulpevent_type_enabled(__u16 sn_type,
 					     struct sctp_event_subscribe *mask)
 {
+	int offset = sn_type - SCTP_SN_TYPE_BASE;
 	char *amask = (char *) mask;
-	return amask[sn_type - SCTP_SN_TYPE_BASE];
+
+	if (offset >= sizeof(struct sctp_event_subscribe))
+		return 0;
+	return amask[offset];
 }
 
 /* Given an event subscription, is this event enabled? */
-- 
cgit v1.2.3


From d25adbeb0cdb860fb39e09cdd025e9cfc954c5ab Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Fri, 15 Sep 2017 11:02:21 +0800
Subject: sctp: fix an use-after-free issue in sctp_sock_dump

Commit 86fdb3448cc1 ("sctp: ensure ep is not destroyed before doing the
dump") tried to fix an use-after-free issue by checking !sctp_sk(sk)->ep
with holding sock and sock lock.

But Paolo noticed that endpoint could be destroyed in sctp_rcv without
sock lock protection. It means the use-after-free issue still could be
triggered when sctp_rcv put and destroy ep after sctp_sock_dump checks
!ep, although it's pretty hard to reproduce.

I could reproduce it by mdelay in sctp_rcv while msleep in sctp_close
and sctp_sock_dump long time.

This patch is to add another param cb_done to sctp_for_each_transport
and dump ep->assocs with holding tsp after jumping out of transport's
traversal in it to avoid this issue.

It can also improve sctp diag dump to make it run faster, as no need
to save sk into cb->args[5] and keep calling sctp_for_each_transport
any more.

This patch is also to use int * instead of int for the pos argument
in sctp_for_each_transport, which could make postion increment only
in sctp_for_each_transport and no need to keep changing cb->args[2]
in sctp_sock_filter and sctp_sock_dump any more.

Fixes: 86fdb3448cc1 ("sctp: ensure ep is not destroyed before doing the dump")
Reported-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/sctp.h |  3 ++-
 net/sctp/sctp_diag.c    | 32 +++++++++-----------------------
 net/sctp/socket.c       | 40 +++++++++++++++++++++++++---------------
 3 files changed, 36 insertions(+), 39 deletions(-)

(limited to 'include/net')

diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index 06b4f515e157..d7d8cba01469 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -127,7 +127,8 @@ int sctp_transport_lookup_process(int (*cb)(struct sctp_transport *, void *),
 				  const union sctp_addr *laddr,
 				  const union sctp_addr *paddr, void *p);
 int sctp_for_each_transport(int (*cb)(struct sctp_transport *, void *),
-			    struct net *net, int pos, void *p);
+			    int (*cb_done)(struct sctp_transport *, void *),
+			    struct net *net, int *pos, void *p);
 int sctp_for_each_endpoint(int (*cb)(struct sctp_endpoint *, void *), void *p);
 int sctp_get_sctp_info(struct sock *sk, struct sctp_association *asoc,
 		       struct sctp_info *info);
diff --git a/net/sctp/sctp_diag.c b/net/sctp/sctp_diag.c
index e99518e79b52..7008a992749b 100644
--- a/net/sctp/sctp_diag.c
+++ b/net/sctp/sctp_diag.c
@@ -279,9 +279,11 @@ out:
 	return err;
 }
 
-static int sctp_sock_dump(struct sock *sk, void *p)
+static int sctp_sock_dump(struct sctp_transport *tsp, void *p)
 {
+	struct sctp_endpoint *ep = tsp->asoc->ep;
 	struct sctp_comm_param *commp = p;
+	struct sock *sk = ep->base.sk;
 	struct sk_buff *skb = commp->skb;
 	struct netlink_callback *cb = commp->cb;
 	const struct inet_diag_req_v2 *r = commp->r;
@@ -289,9 +291,7 @@ static int sctp_sock_dump(struct sock *sk, void *p)
 	int err = 0;
 
 	lock_sock(sk);
-	if (!sctp_sk(sk)->ep)
-		goto release;
-	list_for_each_entry(assoc, &sctp_sk(sk)->ep->asocs, asocs) {
+	list_for_each_entry(assoc, &ep->asocs, asocs) {
 		if (cb->args[4] < cb->args[1])
 			goto next;
 
@@ -327,40 +327,30 @@ next:
 		cb->args[4]++;
 	}
 	cb->args[1] = 0;
-	cb->args[2]++;
 	cb->args[3] = 0;
 	cb->args[4] = 0;
 release:
 	release_sock(sk);
-	sock_put(sk);
 	return err;
 }
 
-static int sctp_get_sock(struct sctp_transport *tsp, void *p)
+static int sctp_sock_filter(struct sctp_transport *tsp, void *p)
 {
 	struct sctp_endpoint *ep = tsp->asoc->ep;
 	struct sctp_comm_param *commp = p;
 	struct sock *sk = ep->base.sk;
-	struct netlink_callback *cb = commp->cb;
 	const struct inet_diag_req_v2 *r = commp->r;
 	struct sctp_association *assoc =
 		list_entry(ep->asocs.next, struct sctp_association, asocs);
 
 	/* find the ep only once through the transports by this condition */
 	if (tsp->asoc != assoc)
-		goto out;
+		return 0;
 
 	if (r->sdiag_family != AF_UNSPEC && sk->sk_family != r->sdiag_family)
-		goto out;
-
-	sock_hold(sk);
-	cb->args[5] = (long)sk;
+		return 0;
 
 	return 1;
-
-out:
-	cb->args[2]++;
-	return 0;
 }
 
 static int sctp_ep_dump(struct sctp_endpoint *ep, void *p)
@@ -503,12 +493,8 @@ skip:
 	if (!(idiag_states & ~(TCPF_LISTEN | TCPF_CLOSE)))
 		goto done;
 
-next:
-	cb->args[5] = 0;
-	sctp_for_each_transport(sctp_get_sock, net, cb->args[2], &commp);
-
-	if (cb->args[5] && !sctp_sock_dump((struct sock *)cb->args[5], &commp))
-		goto next;
+	sctp_for_each_transport(sctp_sock_filter, sctp_sock_dump,
+				net, (int *)&cb->args[2], &commp);
 
 done:
 	cb->args[1] = cb->args[4];
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 1b00a1e09b93..d4730ada7f32 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -4658,29 +4658,39 @@ int sctp_transport_lookup_process(int (*cb)(struct sctp_transport *, void *),
 EXPORT_SYMBOL_GPL(sctp_transport_lookup_process);
 
 int sctp_for_each_transport(int (*cb)(struct sctp_transport *, void *),
-			    struct net *net, int pos, void *p) {
+			    int (*cb_done)(struct sctp_transport *, void *),
+			    struct net *net, int *pos, void *p) {
 	struct rhashtable_iter hti;
-	void *obj;
-	int err;
-
-	err = sctp_transport_walk_start(&hti);
-	if (err)
-		return err;
+	struct sctp_transport *tsp;
+	int ret;
 
-	obj = sctp_transport_get_idx(net, &hti, pos + 1);
-	for (; !IS_ERR_OR_NULL(obj); obj = sctp_transport_get_next(net, &hti)) {
-		struct sctp_transport *transport = obj;
+again:
+	ret = sctp_transport_walk_start(&hti);
+	if (ret)
+		return ret;
 
-		if (!sctp_transport_hold(transport))
+	tsp = sctp_transport_get_idx(net, &hti, *pos + 1);
+	for (; !IS_ERR_OR_NULL(tsp); tsp = sctp_transport_get_next(net, &hti)) {
+		if (!sctp_transport_hold(tsp))
 			continue;
-		err = cb(transport, p);
-		sctp_transport_put(transport);
-		if (err)
+		ret = cb(tsp, p);
+		if (ret)
 			break;
+		(*pos)++;
+		sctp_transport_put(tsp);
 	}
 	sctp_transport_walk_stop(&hti);
 
-	return err;
+	if (ret) {
+		if (cb_done && !cb_done(tsp, p)) {
+			(*pos)++;
+			sctp_transport_put(tsp);
+			goto again;
+		}
+		sctp_transport_put(tsp);
+	}
+
+	return ret;
 }
 EXPORT_SYMBOL_GPL(sctp_for_each_transport);
 
-- 
cgit v1.2.3


From 4c7124413aa759b8ea0b90cd39177e525396e662 Mon Sep 17 00:00:00 2001
From: Yuchung Cheng <ycheng@google.com>
Date: Mon, 18 Sep 2017 11:05:16 -0700
Subject: tcp: remove two unused functions

remove tcp_may_send_now and tcp_snd_test that are no longer used

Fixes: 840a3cbe8969 ("tcp: remove forward retransmit feature")
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h     |  1 -
 net/ipv4/tcp_output.c | 34 ----------------------------------
 2 files changed, 35 deletions(-)

(limited to 'include/net')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index b510f284427a..3bc910a9bfc6 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -544,7 +544,6 @@ u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now,
 		     int min_tso_segs);
 void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss,
 			       int nonagle);
-bool tcp_may_send_now(struct sock *sk);
 int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs);
 int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs);
 void tcp_retransmit_timer(struct sock *sk);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 1c839c99114c..517d737059d1 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1806,40 +1806,6 @@ static bool tcp_snd_wnd_test(const struct tcp_sock *tp,
 	return !after(end_seq, tcp_wnd_end(tp));
 }
 
-/* This checks if the data bearing packet SKB (usually tcp_send_head(sk))
- * should be put on the wire right now.  If so, it returns the number of
- * packets allowed by the congestion window.
- */
-static unsigned int tcp_snd_test(const struct sock *sk, struct sk_buff *skb,
-				 unsigned int cur_mss, int nonagle)
-{
-	const struct tcp_sock *tp = tcp_sk(sk);
-	unsigned int cwnd_quota;
-
-	tcp_init_tso_segs(skb, cur_mss);
-
-	if (!tcp_nagle_test(tp, skb, cur_mss, nonagle))
-		return 0;
-
-	cwnd_quota = tcp_cwnd_test(tp, skb);
-	if (cwnd_quota && !tcp_snd_wnd_test(tp, skb, cur_mss))
-		cwnd_quota = 0;
-
-	return cwnd_quota;
-}
-
-/* Test if sending is allowed right now. */
-bool tcp_may_send_now(struct sock *sk)
-{
-	const struct tcp_sock *tp = tcp_sk(sk);
-	struct sk_buff *skb = tcp_send_head(sk);
-
-	return skb &&
-		tcp_snd_test(sk, skb, tcp_current_mss(sk),
-			     (tcp_skb_is_last(sk, skb) ?
-			      tp->nonagle : TCP_NAGLE_PUSH));
-}
-
 /* Trim TSO SKB to LEN bytes, put the remaining data into a new packet
  * which is put after SKB on the list.  It is very much like
  * tcp_fragment() except that it may make several kinds of assumptions
-- 
cgit v1.2.3


From 222d7dbd258dad4cd5241c43ef818141fad5a87a Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 21 Sep 2017 09:15:46 -0700
Subject: net: prevent dst uses after free
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In linux-4.13, Wei worked hard to convert dst to a traditional
refcounted model, removing GC.

We now want to make sure a dst refcount can not transition from 0 back
to 1.

The problem here is that input path attached a not refcounted dst to an
skb. Then later, because packet is forwarded and hits skb_dst_force()
before exiting RCU section, we might try to take a refcount on one dst
that is about to be freed, if another cpu saw 1 -> 0 transition in
dst_release() and queued the dst for freeing after one RCU grace period.

Lets unify skb_dst_force() and skb_dst_force_safe(), since we should
always perform the complete check against dst refcount, and not assume
it is not zero.

Bugzilla : https://bugzilla.kernel.org/show_bug.cgi?id=197005

[  989.919496]  skb_dst_force+0x32/0x34
[  989.919498]  __dev_queue_xmit+0x1ad/0x482
[  989.919501]  ? eth_header+0x28/0xc6
[  989.919502]  dev_queue_xmit+0xb/0xd
[  989.919504]  neigh_connected_output+0x9b/0xb4
[  989.919507]  ip_finish_output2+0x234/0x294
[  989.919509]  ? ipt_do_table+0x369/0x388
[  989.919510]  ip_finish_output+0x12c/0x13f
[  989.919512]  ip_output+0x53/0x87
[  989.919513]  ip_forward_finish+0x53/0x5a
[  989.919515]  ip_forward+0x2cb/0x3e6
[  989.919516]  ? pskb_trim_rcsum.part.9+0x4b/0x4b
[  989.919518]  ip_rcv_finish+0x2e2/0x321
[  989.919519]  ip_rcv+0x26f/0x2eb
[  989.919522]  ? vlan_do_receive+0x4f/0x289
[  989.919523]  __netif_receive_skb_core+0x467/0x50b
[  989.919526]  ? tcp_gro_receive+0x239/0x239
[  989.919529]  ? inet_gro_receive+0x226/0x238
[  989.919530]  __netif_receive_skb+0x4d/0x5f
[  989.919532]  netif_receive_skb_internal+0x5c/0xaf
[  989.919533]  napi_gro_receive+0x45/0x81
[  989.919536]  ixgbe_poll+0xc8a/0xf09
[  989.919539]  ? kmem_cache_free_bulk+0x1b6/0x1f7
[  989.919540]  net_rx_action+0xf4/0x266
[  989.919543]  __do_softirq+0xa8/0x19d
[  989.919545]  irq_exit+0x5d/0x6b
[  989.919546]  do_IRQ+0x9c/0xb5
[  989.919548]  common_interrupt+0x93/0x93
[  989.919548]  </IRQ>

Similarly dst_clone() can use dst_hold() helper to have additional
debugging, as a follow up to commit 44ebe79149ff ("net: add debug
atomic_inc_not_zero() in dst_hold()")

In net-next we will convert dst atomic_t to refcount_t for peace of
mind.

Fixes: a4c2fd7f7891 ("net: remove DST_NOCACHE flag")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Wei Wang <weiwan@google.com>
Reported-by: Paweł Staszewski <pstaszewski@itcare.pl>
Bisected-by: Paweł Staszewski <pstaszewski@itcare.pl>
Acked-by: Wei Wang <weiwan@google.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dst.h   | 22 ++++------------------
 include/net/route.h |  2 +-
 include/net/sock.h  |  2 +-
 3 files changed, 6 insertions(+), 20 deletions(-)

(limited to 'include/net')

diff --git a/include/net/dst.h b/include/net/dst.h
index 93568bd0a352..06a6765da074 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -271,7 +271,7 @@ static inline void dst_use_noref(struct dst_entry *dst, unsigned long time)
 static inline struct dst_entry *dst_clone(struct dst_entry *dst)
 {
 	if (dst)
-		atomic_inc(&dst->__refcnt);
+		dst_hold(dst);
 	return dst;
 }
 
@@ -311,21 +311,6 @@ static inline void skb_dst_copy(struct sk_buff *nskb, const struct sk_buff *oskb
 	__skb_dst_copy(nskb, oskb->_skb_refdst);
 }
 
-/**
- * skb_dst_force - makes sure skb dst is refcounted
- * @skb: buffer
- *
- * If dst is not yet refcounted, let's do it
- */
-static inline void skb_dst_force(struct sk_buff *skb)
-{
-	if (skb_dst_is_noref(skb)) {
-		WARN_ON(!rcu_read_lock_held());
-		skb->_skb_refdst &= ~SKB_DST_NOREF;
-		dst_clone(skb_dst(skb));
-	}
-}
-
 /**
  * dst_hold_safe - Take a reference on a dst if possible
  * @dst: pointer to dst entry
@@ -339,16 +324,17 @@ static inline bool dst_hold_safe(struct dst_entry *dst)
 }
 
 /**
- * skb_dst_force_safe - makes sure skb dst is refcounted
+ * skb_dst_force - makes sure skb dst is refcounted
  * @skb: buffer
  *
  * If dst is not yet refcounted and not destroyed, grab a ref on it.
  */
-static inline void skb_dst_force_safe(struct sk_buff *skb)
+static inline void skb_dst_force(struct sk_buff *skb)
 {
 	if (skb_dst_is_noref(skb)) {
 		struct dst_entry *dst = skb_dst(skb);
 
+		WARN_ON(!rcu_read_lock_held());
 		if (!dst_hold_safe(dst))
 			dst = NULL;
 
diff --git a/include/net/route.h b/include/net/route.h
index 1b09a9368c68..57dfc6850d37 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -190,7 +190,7 @@ static inline int ip_route_input(struct sk_buff *skb, __be32 dst, __be32 src,
 	rcu_read_lock();
 	err = ip_route_input_noref(skb, dst, src, tos, devin);
 	if (!err) {
-		skb_dst_force_safe(skb);
+		skb_dst_force(skb);
 		if (!skb_dst(skb))
 			err = -EINVAL;
 	}
diff --git a/include/net/sock.h b/include/net/sock.h
index 03a362568357..a6b9a8d1a6df 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -856,7 +856,7 @@ void sk_stream_write_space(struct sock *sk);
 static inline void __sk_add_backlog(struct sock *sk, struct sk_buff *skb)
 {
 	/* dont let skb dst not refcounted, we are going to leave rcu lock */
-	skb_dst_force_safe(skb);
+	skb_dst_force(skb);
 
 	if (!sk->sk_backlog.tail)
 		sk->sk_backlog.head = skb;
-- 
cgit v1.2.3


From b4391db42308c9940944b5d7be5ca4b78fb88dd0 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 22 Sep 2017 23:29:19 +0200
Subject: netlink: fix nla_put_{u8,u16,u32} for KASAN

When CONFIG_KASAN is enabled, the "--param asan-stack=1" causes rather large
stack frames in some functions. This goes unnoticed normally because
CONFIG_FRAME_WARN is disabled with CONFIG_KASAN by default as of commit
3f181b4d8652 ("lib/Kconfig.debug: disable -Wframe-larger-than warnings with
KASAN=y").

The kernelci.org build bot however has the warning enabled and that led
me to investigate it a little further, as every build produces these warnings:

net/wireless/nl80211.c:4389:1: warning: the frame size of 2240 bytes is larger than 2048 bytes [-Wframe-larger-than=]
net/wireless/nl80211.c:1895:1: warning: the frame size of 3776 bytes is larger than 2048 bytes [-Wframe-larger-than=]
net/wireless/nl80211.c:1410:1: warning: the frame size of 2208 bytes is larger than 2048 bytes [-Wframe-larger-than=]
net/bridge/br_netlink.c:1282:1: warning: the frame size of 2544 bytes is larger than 2048 bytes [-Wframe-larger-than=]

Most of this problem is now solved in gcc-8, which can consolidate
the stack slots for the inline function arguments. On older compilers
we can add a workaround by declaring a local variable in each function
to pass the inline function argument.

Cc: stable@vger.kernel.org
Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81715
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netlink.h | 73 ++++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 55 insertions(+), 18 deletions(-)

(limited to 'include/net')

diff --git a/include/net/netlink.h b/include/net/netlink.h
index e51cf5f81597..14c289393071 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -773,7 +773,10 @@ static inline int nla_parse_nested(struct nlattr *tb[], int maxtype,
  */
 static inline int nla_put_u8(struct sk_buff *skb, int attrtype, u8 value)
 {
-	return nla_put(skb, attrtype, sizeof(u8), &value);
+	/* temporary variables to work around GCC PR81715 with asan-stack=1 */
+	u8 tmp = value;
+
+	return nla_put(skb, attrtype, sizeof(u8), &tmp);
 }
 
 /**
@@ -784,7 +787,9 @@ static inline int nla_put_u8(struct sk_buff *skb, int attrtype, u8 value)
  */
 static inline int nla_put_u16(struct sk_buff *skb, int attrtype, u16 value)
 {
-	return nla_put(skb, attrtype, sizeof(u16), &value);
+	u16 tmp = value;
+
+	return nla_put(skb, attrtype, sizeof(u16), &tmp);
 }
 
 /**
@@ -795,7 +800,9 @@ static inline int nla_put_u16(struct sk_buff *skb, int attrtype, u16 value)
  */
 static inline int nla_put_be16(struct sk_buff *skb, int attrtype, __be16 value)
 {
-	return nla_put(skb, attrtype, sizeof(__be16), &value);
+	__be16 tmp = value;
+
+	return nla_put(skb, attrtype, sizeof(__be16), &tmp);
 }
 
 /**
@@ -806,7 +813,9 @@ static inline int nla_put_be16(struct sk_buff *skb, int attrtype, __be16 value)
  */
 static inline int nla_put_net16(struct sk_buff *skb, int attrtype, __be16 value)
 {
-	return nla_put_be16(skb, attrtype | NLA_F_NET_BYTEORDER, value);
+	__be16 tmp = value;
+
+	return nla_put_be16(skb, attrtype | NLA_F_NET_BYTEORDER, tmp);
 }
 
 /**
@@ -817,7 +826,9 @@ static inline int nla_put_net16(struct sk_buff *skb, int attrtype, __be16 value)
  */
 static inline int nla_put_le16(struct sk_buff *skb, int attrtype, __le16 value)
 {
-	return nla_put(skb, attrtype, sizeof(__le16), &value);
+	__le16 tmp = value;
+
+	return nla_put(skb, attrtype, sizeof(__le16), &tmp);
 }
 
 /**
@@ -828,7 +839,9 @@ static inline int nla_put_le16(struct sk_buff *skb, int attrtype, __le16 value)
  */
 static inline int nla_put_u32(struct sk_buff *skb, int attrtype, u32 value)
 {
-	return nla_put(skb, attrtype, sizeof(u32), &value);
+	u32 tmp = value;
+
+	return nla_put(skb, attrtype, sizeof(u32), &tmp);
 }
 
 /**
@@ -839,7 +852,9 @@ static inline int nla_put_u32(struct sk_buff *skb, int attrtype, u32 value)
  */
 static inline int nla_put_be32(struct sk_buff *skb, int attrtype, __be32 value)
 {
-	return nla_put(skb, attrtype, sizeof(__be32), &value);
+	__be32 tmp = value;
+
+	return nla_put(skb, attrtype, sizeof(__be32), &tmp);
 }
 
 /**
@@ -850,7 +865,9 @@ static inline int nla_put_be32(struct sk_buff *skb, int attrtype, __be32 value)
  */
 static inline int nla_put_net32(struct sk_buff *skb, int attrtype, __be32 value)
 {
-	return nla_put_be32(skb, attrtype | NLA_F_NET_BYTEORDER, value);
+	__be32 tmp = value;
+
+	return nla_put_be32(skb, attrtype | NLA_F_NET_BYTEORDER, tmp);
 }
 
 /**
@@ -861,7 +878,9 @@ static inline int nla_put_net32(struct sk_buff *skb, int attrtype, __be32 value)
  */
 static inline int nla_put_le32(struct sk_buff *skb, int attrtype, __le32 value)
 {
-	return nla_put(skb, attrtype, sizeof(__le32), &value);
+	__le32 tmp = value;
+
+	return nla_put(skb, attrtype, sizeof(__le32), &tmp);
 }
 
 /**
@@ -874,7 +893,9 @@ static inline int nla_put_le32(struct sk_buff *skb, int attrtype, __le32 value)
 static inline int nla_put_u64_64bit(struct sk_buff *skb, int attrtype,
 				    u64 value, int padattr)
 {
-	return nla_put_64bit(skb, attrtype, sizeof(u64), &value, padattr);
+	u64 tmp = value;
+
+	return nla_put_64bit(skb, attrtype, sizeof(u64), &tmp, padattr);
 }
 
 /**
@@ -887,7 +908,9 @@ static inline int nla_put_u64_64bit(struct sk_buff *skb, int attrtype,
 static inline int nla_put_be64(struct sk_buff *skb, int attrtype, __be64 value,
 			       int padattr)
 {
-	return nla_put_64bit(skb, attrtype, sizeof(__be64), &value, padattr);
+	__be64 tmp = value;
+
+	return nla_put_64bit(skb, attrtype, sizeof(__be64), &tmp, padattr);
 }
 
 /**
@@ -900,7 +923,9 @@ static inline int nla_put_be64(struct sk_buff *skb, int attrtype, __be64 value,
 static inline int nla_put_net64(struct sk_buff *skb, int attrtype, __be64 value,
 				int padattr)
 {
-	return nla_put_be64(skb, attrtype | NLA_F_NET_BYTEORDER, value,
+	__be64 tmp = value;
+
+	return nla_put_be64(skb, attrtype | NLA_F_NET_BYTEORDER, tmp,
 			    padattr);
 }
 
@@ -914,7 +939,9 @@ static inline int nla_put_net64(struct sk_buff *skb, int attrtype, __be64 value,
 static inline int nla_put_le64(struct sk_buff *skb, int attrtype, __le64 value,
 			       int padattr)
 {
-	return nla_put_64bit(skb, attrtype, sizeof(__le64), &value, padattr);
+	__le64 tmp = value;
+
+	return nla_put_64bit(skb, attrtype, sizeof(__le64), &tmp, padattr);
 }
 
 /**
@@ -925,7 +952,9 @@ static inline int nla_put_le64(struct sk_buff *skb, int attrtype, __le64 value,
  */
 static inline int nla_put_s8(struct sk_buff *skb, int attrtype, s8 value)
 {
-	return nla_put(skb, attrtype, sizeof(s8), &value);
+	s8 tmp = value;
+
+	return nla_put(skb, attrtype, sizeof(s8), &tmp);
 }
 
 /**
@@ -936,7 +965,9 @@ static inline int nla_put_s8(struct sk_buff *skb, int attrtype, s8 value)
  */
 static inline int nla_put_s16(struct sk_buff *skb, int attrtype, s16 value)
 {
-	return nla_put(skb, attrtype, sizeof(s16), &value);
+	s16 tmp = value;
+
+	return nla_put(skb, attrtype, sizeof(s16), &tmp);
 }
 
 /**
@@ -947,7 +978,9 @@ static inline int nla_put_s16(struct sk_buff *skb, int attrtype, s16 value)
  */
 static inline int nla_put_s32(struct sk_buff *skb, int attrtype, s32 value)
 {
-	return nla_put(skb, attrtype, sizeof(s32), &value);
+	s32 tmp = value;
+
+	return nla_put(skb, attrtype, sizeof(s32), &tmp);
 }
 
 /**
@@ -960,7 +993,9 @@ static inline int nla_put_s32(struct sk_buff *skb, int attrtype, s32 value)
 static inline int nla_put_s64(struct sk_buff *skb, int attrtype, s64 value,
 			      int padattr)
 {
-	return nla_put_64bit(skb, attrtype, sizeof(s64), &value, padattr);
+	s64 tmp = value;
+
+	return nla_put_64bit(skb, attrtype, sizeof(s64), &tmp, padattr);
 }
 
 /**
@@ -1010,7 +1045,9 @@ static inline int nla_put_msecs(struct sk_buff *skb, int attrtype,
 static inline int nla_put_in_addr(struct sk_buff *skb, int attrtype,
 				  __be32 addr)
 {
-	return nla_put_be32(skb, attrtype, addr);
+	__be32 tmp = addr;
+
+	return nla_put_be32(skb, attrtype, tmp);
 }
 
 /**
-- 
cgit v1.2.3


From 7487449c86c65202b3b725c4524cb48dd65e4e6f Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Thu, 28 Sep 2017 15:51:36 +0200
Subject: IPv4: early demux can return an error code

Currently no error is emitted, but this infrastructure will
used by the next patch to allow source address validation
for mcast sockets.
Since early demux can do a route lookup and an ipv4 route
lookup can return an error code this is consistent with the
current ipv4 route infrastructure.

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/protocol.h |  4 ++--
 include/net/tcp.h      |  2 +-
 include/net/udp.h      |  2 +-
 net/ipv4/ip_input.c    | 25 +++++++++++++++----------
 net/ipv4/tcp_ipv4.c    |  9 +++++----
 net/ipv4/udp.c         | 11 ++++++-----
 6 files changed, 30 insertions(+), 23 deletions(-)

(limited to 'include/net')

diff --git a/include/net/protocol.h b/include/net/protocol.h
index 65ba335b0e7e..4fc75f7ae23b 100644
--- a/include/net/protocol.h
+++ b/include/net/protocol.h
@@ -39,8 +39,8 @@
 
 /* This is used to register protocols. */
 struct net_protocol {
-	void			(*early_demux)(struct sk_buff *skb);
-	void                    (*early_demux_handler)(struct sk_buff *skb);
+	int			(*early_demux)(struct sk_buff *skb);
+	int			(*early_demux_handler)(struct sk_buff *skb);
 	int			(*handler)(struct sk_buff *skb);
 	void			(*err_handler)(struct sk_buff *skb, u32 info);
 	unsigned int		no_policy:1,
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 3bc910a9bfc6..89974c5286d8 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -345,7 +345,7 @@ void tcp_v4_err(struct sk_buff *skb, u32);
 
 void tcp_shutdown(struct sock *sk, int how);
 
-void tcp_v4_early_demux(struct sk_buff *skb);
+int tcp_v4_early_demux(struct sk_buff *skb);
 int tcp_v4_rcv(struct sk_buff *skb);
 
 int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw);
diff --git a/include/net/udp.h b/include/net/udp.h
index 12dfbfe2e2d7..6c759c8594e2 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -259,7 +259,7 @@ static inline struct sk_buff *skb_recv_udp(struct sock *sk, unsigned int flags,
 	return __skb_recv_udp(sk, flags, noblock, &peeked, &off, err);
 }
 
-void udp_v4_early_demux(struct sk_buff *skb);
+int udp_v4_early_demux(struct sk_buff *skb);
 bool udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst);
 int udp_get_port(struct sock *sk, unsigned short snum,
 		 int (*saddr_cmp)(const struct sock *,
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index fa2dc8f692c6..57fc13c6ab2b 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -311,9 +311,10 @@ drop:
 static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	const struct iphdr *iph = ip_hdr(skb);
-	struct rtable *rt;
+	int (*edemux)(struct sk_buff *skb);
 	struct net_device *dev = skb->dev;
-	void (*edemux)(struct sk_buff *skb);
+	struct rtable *rt;
+	int err;
 
 	/* if ingress device is enslaved to an L3 master device pass the
 	 * skb to its handler for processing
@@ -331,7 +332,9 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
 
 		ipprot = rcu_dereference(inet_protos[protocol]);
 		if (ipprot && (edemux = READ_ONCE(ipprot->early_demux))) {
-			edemux(skb);
+			err = edemux(skb);
+			if (unlikely(err))
+				goto drop_error;
 			/* must reload iph, skb->head might have changed */
 			iph = ip_hdr(skb);
 		}
@@ -342,13 +345,10 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
 	 *	how the packet travels inside Linux networking.
 	 */
 	if (!skb_valid_dst(skb)) {
-		int err = ip_route_input_noref(skb, iph->daddr, iph->saddr,
-					       iph->tos, dev);
-		if (unlikely(err)) {
-			if (err == -EXDEV)
-				__NET_INC_STATS(net, LINUX_MIB_IPRPFILTER);
-			goto drop;
-		}
+		err = ip_route_input_noref(skb, iph->daddr, iph->saddr,
+					   iph->tos, dev);
+		if (unlikely(err))
+			goto drop_error;
 	}
 
 #ifdef CONFIG_IP_ROUTE_CLASSID
@@ -399,6 +399,11 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
 drop:
 	kfree_skb(skb);
 	return NET_RX_DROP;
+
+drop_error:
+	if (err == -EXDEV)
+		__NET_INC_STATS(net, LINUX_MIB_IPRPFILTER);
+	goto drop;
 }
 
 /*
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index d9416b5162bc..85164d4d3e53 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1503,23 +1503,23 @@ csum_err:
 }
 EXPORT_SYMBOL(tcp_v4_do_rcv);
 
-void tcp_v4_early_demux(struct sk_buff *skb)
+int tcp_v4_early_demux(struct sk_buff *skb)
 {
 	const struct iphdr *iph;
 	const struct tcphdr *th;
 	struct sock *sk;
 
 	if (skb->pkt_type != PACKET_HOST)
-		return;
+		return 0;
 
 	if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
-		return;
+		return 0;
 
 	iph = ip_hdr(skb);
 	th = tcp_hdr(skb);
 
 	if (th->doff < sizeof(struct tcphdr) / 4)
-		return;
+		return 0;
 
 	sk = __inet_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
 				       iph->saddr, th->source,
@@ -1538,6 +1538,7 @@ void tcp_v4_early_demux(struct sk_buff *skb)
 				skb_dst_set_noref(skb, dst);
 		}
 	}
+	return 0;
 }
 
 bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb)
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index ef29df8648e4..9b30f821fe96 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -2221,7 +2221,7 @@ static struct sock *__udp4_lib_demux_lookup(struct net *net,
 	return NULL;
 }
 
-void udp_v4_early_demux(struct sk_buff *skb)
+int udp_v4_early_demux(struct sk_buff *skb)
 {
 	struct net *net = dev_net(skb->dev);
 	const struct iphdr *iph;
@@ -2234,7 +2234,7 @@ void udp_v4_early_demux(struct sk_buff *skb)
 
 	/* validate the packet */
 	if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct udphdr)))
-		return;
+		return 0;
 
 	iph = ip_hdr(skb);
 	uh = udp_hdr(skb);
@@ -2244,14 +2244,14 @@ void udp_v4_early_demux(struct sk_buff *skb)
 		struct in_device *in_dev = __in_dev_get_rcu(skb->dev);
 
 		if (!in_dev)
-			return;
+			return 0;
 
 		/* we are supposed to accept bcast packets */
 		if (skb->pkt_type == PACKET_MULTICAST) {
 			ours = ip_check_mc_rcu(in_dev, iph->daddr, iph->saddr,
 					       iph->protocol);
 			if (!ours)
-				return;
+				return 0;
 		}
 
 		sk = __udp4_lib_mcast_demux_lookup(net, uh->dest, iph->daddr,
@@ -2263,7 +2263,7 @@ void udp_v4_early_demux(struct sk_buff *skb)
 	}
 
 	if (!sk || !refcount_inc_not_zero(&sk->sk_refcnt))
-		return;
+		return 0;
 
 	skb->sk = sk;
 	skb->destructor = sock_efree;
@@ -2278,6 +2278,7 @@ void udp_v4_early_demux(struct sk_buff *skb)
 		 */
 		skb_dst_set_noref(skb, dst);
 	}
+	return 0;
 }
 
 int udp_rcv(struct sk_buff *skb)
-- 
cgit v1.2.3


From bc044e8db7962e727a75b591b9851ff2ac5cf846 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Thu, 28 Sep 2017 15:51:37 +0200
Subject: udp: perform source validation for mcast early demux

The UDP early demux can leverate the rx dst cache even for
multicast unconnected sockets.

In such scenario the ipv4 source address is validated only on
the first packet in the given flow. After that, when we fetch
the dst entry  from the socket rx cache, we stop enforcing
the rp_filter and we even start accepting any kind of martian
addresses.

Disabling the dst cache for unconnected multicast socket will
cause large performace regression, nearly reducing by half the
max ingress tput.

Instead we factor out a route helper to completely validate an
skb source address for multicast packets and we call it from
the UDP early demux for mcast packets landing on unconnected
sockets, after successful fetching the related cached dst entry.

This still gives a measurable, but limited performance
regression:

		rp_filter = 0		rp_filter = 1
edmux disabled:	1182 Kpps		1127 Kpps
edmux before:	2238 Kpps		2238 Kpps
edmux after:	2037 Kpps		2019 Kpps

The above figures are on top of current net tree.
Applying the net-next commit 6e617de84e87 ("net: avoid a full
fib lookup when rp_filter is disabled.") the delta with
rp_filter == 0 will decrease even more.

Fixes: 421b3885bf6d ("udp: ipv4: Add udp early demux")
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/route.h |  4 +++-
 net/ipv4/route.c    | 46 ++++++++++++++++++++++++++--------------------
 net/ipv4/udp.c      | 13 ++++++++++++-
 3 files changed, 41 insertions(+), 22 deletions(-)

(limited to 'include/net')

diff --git a/include/net/route.h b/include/net/route.h
index 57dfc6850d37..d538e6db1afe 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -175,7 +175,9 @@ static inline struct rtable *ip_route_output_gre(struct net *net, struct flowi4
 	fl4->fl4_gre_key = gre_key;
 	return ip_route_output_key(net, fl4);
 }
-
+int ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr,
+			  u8 tos, struct net_device *dev,
+			  struct in_device *in_dev, u32 *itag);
 int ip_route_input_noref(struct sk_buff *skb, __be32 dst, __be32 src,
 			 u8 tos, struct net_device *devin);
 int ip_route_input_rcu(struct sk_buff *skb, __be32 dst, __be32 src,
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 94d4cd2d5ea4..ac6fde5d45f1 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1520,43 +1520,56 @@ struct rtable *rt_dst_alloc(struct net_device *dev,
 EXPORT_SYMBOL(rt_dst_alloc);
 
 /* called in rcu_read_lock() section */
-static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
-				u8 tos, struct net_device *dev, int our)
+int ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr,
+			  u8 tos, struct net_device *dev,
+			  struct in_device *in_dev, u32 *itag)
 {
-	struct rtable *rth;
-	struct in_device *in_dev = __in_dev_get_rcu(dev);
-	unsigned int flags = RTCF_MULTICAST;
-	u32 itag = 0;
 	int err;
 
 	/* Primary sanity checks. */
-
 	if (!in_dev)
 		return -EINVAL;
 
 	if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) ||
 	    skb->protocol != htons(ETH_P_IP))
-		goto e_inval;
+		return -EINVAL;
 
 	if (ipv4_is_loopback(saddr) && !IN_DEV_ROUTE_LOCALNET(in_dev))
-		goto e_inval;
+		return -EINVAL;
 
 	if (ipv4_is_zeronet(saddr)) {
 		if (!ipv4_is_local_multicast(daddr))
-			goto e_inval;
+			return -EINVAL;
 	} else {
 		err = fib_validate_source(skb, saddr, 0, tos, 0, dev,
-					  in_dev, &itag);
+					  in_dev, itag);
 		if (err < 0)
-			goto e_err;
+			return err;
 	}
+	return 0;
+}
+
+/* called in rcu_read_lock() section */
+static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
+			     u8 tos, struct net_device *dev, int our)
+{
+	struct in_device *in_dev = __in_dev_get_rcu(dev);
+	unsigned int flags = RTCF_MULTICAST;
+	struct rtable *rth;
+	u32 itag = 0;
+	int err;
+
+	err = ip_mc_validate_source(skb, daddr, saddr, tos, dev, in_dev, &itag);
+	if (err)
+		return err;
+
 	if (our)
 		flags |= RTCF_LOCAL;
 
 	rth = rt_dst_alloc(dev_net(dev)->loopback_dev, flags, RTN_MULTICAST,
 			   IN_DEV_CONF_GET(in_dev, NOPOLICY), false, false);
 	if (!rth)
-		goto e_nobufs;
+		return -ENOBUFS;
 
 #ifdef CONFIG_IP_ROUTE_CLASSID
 	rth->dst.tclassid = itag;
@@ -1572,13 +1585,6 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 
 	skb_dst_set(skb, &rth->dst);
 	return 0;
-
-e_nobufs:
-	return -ENOBUFS;
-e_inval:
-	return -EINVAL;
-e_err:
-	return err;
 }
 
 
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 9b30f821fe96..5676237d2b0f 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -2224,6 +2224,7 @@ static struct sock *__udp4_lib_demux_lookup(struct net *net,
 int udp_v4_early_demux(struct sk_buff *skb)
 {
 	struct net *net = dev_net(skb->dev);
+	struct in_device *in_dev = NULL;
 	const struct iphdr *iph;
 	const struct udphdr *uh;
 	struct sock *sk = NULL;
@@ -2241,7 +2242,7 @@ int udp_v4_early_demux(struct sk_buff *skb)
 
 	if (skb->pkt_type == PACKET_BROADCAST ||
 	    skb->pkt_type == PACKET_MULTICAST) {
-		struct in_device *in_dev = __in_dev_get_rcu(skb->dev);
+		in_dev = __in_dev_get_rcu(skb->dev);
 
 		if (!in_dev)
 			return 0;
@@ -2272,11 +2273,21 @@ int udp_v4_early_demux(struct sk_buff *skb)
 	if (dst)
 		dst = dst_check(dst, 0);
 	if (dst) {
+		u32 itag = 0;
+
 		/* set noref for now.
 		 * any place which wants to hold dst has to call
 		 * dst_hold_safe()
 		 */
 		skb_dst_set_noref(skb, dst);
+
+		/* for unconnected multicast sockets we need to validate
+		 * the source on each packet
+		 */
+		if (!inet_sk(sk)->inet_daddr && in_dev)
+			return ip_mc_validate_source(skb, iph->daddr,
+						     iph->saddr, iph->tos,
+						     skb->dev, in_dev, &itag);
 	}
 	return 0;
 }
-- 
cgit v1.2.3


From 34f79502bbcfab659b8729da68b5e387f96eb4c1 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Wed, 18 Oct 2017 07:10:36 -0700
Subject: bpf: avoid preempt enable/disable in sockmap using tcp_skb_cb region

SK_SKB BPF programs are run from the socket/tcp context but early in
the stack before much of the TCP metadata is needed in tcp_skb_cb. So
we can use some unused fields to place BPF metadata needed for SK_SKB
programs when implementing the redirect function.

This allows us to drop the preempt disable logic. It does however
require an API change so sk_redirect_map() has been updated to
additionally provide ctx_ptr to skb. Note, we do however continue to
disable/enable preemption around actual BPF program running to account
for map updates.

Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/filter.h                             |  2 +-
 include/net/tcp.h                                  |  5 ++++
 kernel/bpf/sockmap.c                               | 19 +++++++-------
 net/core/filter.c                                  | 29 +++++++++++-----------
 samples/sockmap/sockmap_kern.c                     |  2 +-
 tools/include/uapi/linux/bpf.h                     |  3 ++-
 tools/testing/selftests/bpf/bpf_helpers.h          |  2 +-
 tools/testing/selftests/bpf/sockmap_verdict_prog.c |  4 +--
 8 files changed, 36 insertions(+), 30 deletions(-)

(limited to 'include/net')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index d29e58fde364..818a0b26249e 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -728,7 +728,7 @@ void xdp_do_flush_map(void);
 void bpf_warn_invalid_xdp_action(u32 act);
 void bpf_warn_invalid_xdp_redirect(u32 ifindex);
 
-struct sock *do_sk_redirect_map(void);
+struct sock *do_sk_redirect_map(struct sk_buff *skb);
 
 #ifdef CONFIG_BPF_JIT
 extern int bpf_jit_enable;
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 89974c5286d8..b1ef98ebce53 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -840,6 +840,11 @@ struct tcp_skb_cb {
 			struct inet6_skb_parm	h6;
 #endif
 		} header;	/* For incoming skbs */
+		struct {
+			__u32 key;
+			__u32 flags;
+			struct bpf_map *map;
+		} bpf;
 	};
 };
 
diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c
index c68899d5b246..beaabb21c3a3 100644
--- a/kernel/bpf/sockmap.c
+++ b/kernel/bpf/sockmap.c
@@ -39,6 +39,7 @@
 #include <linux/workqueue.h>
 #include <linux/list.h>
 #include <net/strparser.h>
+#include <net/tcp.h>
 
 struct bpf_stab {
 	struct bpf_map map;
@@ -101,9 +102,16 @@ static int smap_verdict_func(struct smap_psock *psock, struct sk_buff *skb)
 		return SK_DROP;
 
 	skb_orphan(skb);
+	/* We need to ensure that BPF metadata for maps is also cleared
+	 * when we orphan the skb so that we don't have the possibility
+	 * to reference a stale map.
+	 */
+	TCP_SKB_CB(skb)->bpf.map = NULL;
 	skb->sk = psock->sock;
 	bpf_compute_data_end(skb);
+	preempt_disable();
 	rc = (*prog->bpf_func)(skb, prog->insnsi);
+	preempt_enable();
 	skb->sk = NULL;
 
 	return rc;
@@ -114,17 +122,10 @@ static void smap_do_verdict(struct smap_psock *psock, struct sk_buff *skb)
 	struct sock *sk;
 	int rc;
 
-	/* Because we use per cpu values to feed input from sock redirect
-	 * in BPF program to do_sk_redirect_map() call we need to ensure we
-	 * are not preempted. RCU read lock is not sufficient in this case
-	 * with CONFIG_PREEMPT_RCU enabled so we must be explicit here.
-	 */
-	preempt_disable();
 	rc = smap_verdict_func(psock, skb);
 	switch (rc) {
 	case SK_REDIRECT:
-		sk = do_sk_redirect_map();
-		preempt_enable();
+		sk = do_sk_redirect_map(skb);
 		if (likely(sk)) {
 			struct smap_psock *peer = smap_psock_sk(sk);
 
@@ -141,8 +142,6 @@ static void smap_do_verdict(struct smap_psock *psock, struct sk_buff *skb)
 	/* Fall through and free skb otherwise */
 	case SK_DROP:
 	default:
-		if (rc != SK_REDIRECT)
-			preempt_enable();
 		kfree_skb(skb);
 	}
 }
diff --git a/net/core/filter.c b/net/core/filter.c
index 74b8c91fb5f4..ca1ba0bbfbc2 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1839,31 +1839,31 @@ static const struct bpf_func_proto bpf_redirect_proto = {
 	.arg2_type      = ARG_ANYTHING,
 };
 
-BPF_CALL_3(bpf_sk_redirect_map, struct bpf_map *, map, u32, key, u64, flags)
+BPF_CALL_4(bpf_sk_redirect_map, struct sk_buff *, skb,
+	   struct bpf_map *, map, u32, key, u64, flags)
 {
-	struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+	struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
 
 	if (unlikely(flags))
 		return SK_ABORTED;
 
-	ri->ifindex = key;
-	ri->flags = flags;
-	ri->map = map;
+	tcb->bpf.key = key;
+	tcb->bpf.flags = flags;
+	tcb->bpf.map = map;
 
 	return SK_REDIRECT;
 }
 
-struct sock *do_sk_redirect_map(void)
+struct sock *do_sk_redirect_map(struct sk_buff *skb)
 {
-	struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+	struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
 	struct sock *sk = NULL;
 
-	if (ri->map) {
-		sk = __sock_map_lookup_elem(ri->map, ri->ifindex);
+	if (tcb->bpf.map) {
+		sk = __sock_map_lookup_elem(tcb->bpf.map, tcb->bpf.key);
 
-		ri->ifindex = 0;
-		ri->map = NULL;
-		/* we do not clear flags for future lookup */
+		tcb->bpf.key = 0;
+		tcb->bpf.map = NULL;
 	}
 
 	return sk;
@@ -1873,9 +1873,10 @@ static const struct bpf_func_proto bpf_sk_redirect_map_proto = {
 	.func           = bpf_sk_redirect_map,
 	.gpl_only       = false,
 	.ret_type       = RET_INTEGER,
-	.arg1_type      = ARG_CONST_MAP_PTR,
-	.arg2_type      = ARG_ANYTHING,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type      = ARG_CONST_MAP_PTR,
 	.arg3_type      = ARG_ANYTHING,
+	.arg4_type      = ARG_ANYTHING,
 };
 
 BPF_CALL_1(bpf_get_cgroup_classid, const struct sk_buff *, skb)
diff --git a/samples/sockmap/sockmap_kern.c b/samples/sockmap/sockmap_kern.c
index f9b38ef82dc2..52b0053274f4 100644
--- a/samples/sockmap/sockmap_kern.c
+++ b/samples/sockmap/sockmap_kern.c
@@ -62,7 +62,7 @@ int bpf_prog2(struct __sk_buff *skb)
 		ret = 1;
 
 	bpf_printk("sockmap: %d -> %d @ %d\n", lport, bpf_ntohl(rport), ret);
-	return bpf_sk_redirect_map(&sock_map, ret, 0);
+	return bpf_sk_redirect_map(skb, &sock_map, ret, 0);
 }
 
 SEC("sockops")
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 43ab5c402f98..be9a631a69f7 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -569,9 +569,10 @@ union bpf_attr {
  *     @flags: reserved for future use
  *     Return: 0 on success or negative error code
  *
- * int bpf_sk_redirect_map(map, key, flags)
+ * int bpf_sk_redirect_map(skb, map, key, flags)
  *     Redirect skb to a sock in map using key as a lookup key for the
  *     sock in map.
+ *     @skb: pointer to skb
  *     @map: pointer to sockmap
  *     @key: key to lookup sock in map
  *     @flags: reserved for future use
diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h
index 36fb9161b34a..b2e02bdcd098 100644
--- a/tools/testing/selftests/bpf/bpf_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_helpers.h
@@ -65,7 +65,7 @@ static int (*bpf_xdp_adjust_head)(void *ctx, int offset) =
 static int (*bpf_setsockopt)(void *ctx, int level, int optname, void *optval,
 			     int optlen) =
 	(void *) BPF_FUNC_setsockopt;
-static int (*bpf_sk_redirect_map)(void *map, int key, int flags) =
+static int (*bpf_sk_redirect_map)(void *ctx, void *map, int key, int flags) =
 	(void *) BPF_FUNC_sk_redirect_map;
 static int (*bpf_sock_map_update)(void *map, void *key, void *value,
 				  unsigned long long flags) =
diff --git a/tools/testing/selftests/bpf/sockmap_verdict_prog.c b/tools/testing/selftests/bpf/sockmap_verdict_prog.c
index 9b99bd10807d..2cd2d552938b 100644
--- a/tools/testing/selftests/bpf/sockmap_verdict_prog.c
+++ b/tools/testing/selftests/bpf/sockmap_verdict_prog.c
@@ -61,8 +61,8 @@ int bpf_prog2(struct __sk_buff *skb)
 	bpf_printk("verdict: data[0] = redir(%u:%u)\n", map, sk);
 
 	if (!map)
-		return bpf_sk_redirect_map(&sock_map_rx, sk, 0);
-	return bpf_sk_redirect_map(&sock_map_tx, sk, 0);
+		return bpf_sk_redirect_map(skb, &sock_map_rx, sk, 0);
+	return bpf_sk_redirect_map(skb, &sock_map_tx, sk, 0);
 }
 
 char _license[] SEC("license") = "GPL";
-- 
cgit v1.2.3


From c92e8c02fe664155ac4234516e32544bec0f113d Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 20 Oct 2017 09:04:13 -0700
Subject: tcp/dccp: fix ireq->opt races

syzkaller found another bug in DCCP/TCP stacks [1]

For the reasons explained in commit ce1050089c96 ("tcp/dccp: fix
ireq->pktopts race"), we need to make sure we do not access
ireq->opt unless we own the request sock.

Note the opt field is renamed to ireq_opt to ease grep games.

[1]
BUG: KASAN: use-after-free in ip_queue_xmit+0x1687/0x18e0 net/ipv4/ip_output.c:474
Read of size 1 at addr ffff8801c951039c by task syz-executor5/3295

CPU: 1 PID: 3295 Comm: syz-executor5 Not tainted 4.14.0-rc4+ #80
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Call Trace:
 __dump_stack lib/dump_stack.c:16 [inline]
 dump_stack+0x194/0x257 lib/dump_stack.c:52
 print_address_description+0x73/0x250 mm/kasan/report.c:252
 kasan_report_error mm/kasan/report.c:351 [inline]
 kasan_report+0x25b/0x340 mm/kasan/report.c:409
 __asan_report_load1_noabort+0x14/0x20 mm/kasan/report.c:427
 ip_queue_xmit+0x1687/0x18e0 net/ipv4/ip_output.c:474
 tcp_transmit_skb+0x1ab7/0x3840 net/ipv4/tcp_output.c:1135
 tcp_send_ack.part.37+0x3bb/0x650 net/ipv4/tcp_output.c:3587
 tcp_send_ack+0x49/0x60 net/ipv4/tcp_output.c:3557
 __tcp_ack_snd_check+0x2c6/0x4b0 net/ipv4/tcp_input.c:5072
 tcp_ack_snd_check net/ipv4/tcp_input.c:5085 [inline]
 tcp_rcv_state_process+0x2eff/0x4850 net/ipv4/tcp_input.c:6071
 tcp_child_process+0x342/0x990 net/ipv4/tcp_minisocks.c:816
 tcp_v4_rcv+0x1827/0x2f80 net/ipv4/tcp_ipv4.c:1682
 ip_local_deliver_finish+0x2e2/0xba0 net/ipv4/ip_input.c:216
 NF_HOOK include/linux/netfilter.h:249 [inline]
 ip_local_deliver+0x1ce/0x6e0 net/ipv4/ip_input.c:257
 dst_input include/net/dst.h:464 [inline]
 ip_rcv_finish+0x887/0x19a0 net/ipv4/ip_input.c:397
 NF_HOOK include/linux/netfilter.h:249 [inline]
 ip_rcv+0xc3f/0x1820 net/ipv4/ip_input.c:493
 __netif_receive_skb_core+0x1a3e/0x34b0 net/core/dev.c:4476
 __netif_receive_skb+0x2c/0x1b0 net/core/dev.c:4514
 netif_receive_skb_internal+0x10b/0x670 net/core/dev.c:4587
 netif_receive_skb+0xae/0x390 net/core/dev.c:4611
 tun_rx_batched.isra.50+0x5ed/0x860 drivers/net/tun.c:1372
 tun_get_user+0x249c/0x36d0 drivers/net/tun.c:1766
 tun_chr_write_iter+0xbf/0x160 drivers/net/tun.c:1792
 call_write_iter include/linux/fs.h:1770 [inline]
 new_sync_write fs/read_write.c:468 [inline]
 __vfs_write+0x68a/0x970 fs/read_write.c:481
 vfs_write+0x18f/0x510 fs/read_write.c:543
 SYSC_write fs/read_write.c:588 [inline]
 SyS_write+0xef/0x220 fs/read_write.c:580
 entry_SYSCALL_64_fastpath+0x1f/0xbe
RIP: 0033:0x40c341
RSP: 002b:00007f469523ec10 EFLAGS: 00000293 ORIG_RAX: 0000000000000001
RAX: ffffffffffffffda RBX: 0000000000718000 RCX: 000000000040c341
RDX: 0000000000000037 RSI: 0000000020004000 RDI: 0000000000000015
RBP: 0000000000000086 R08: 0000000000000000 R09: 0000000000000000
R10: 00000000000f4240 R11: 0000000000000293 R12: 00000000004b7fd1
R13: 00000000ffffffff R14: 0000000020000000 R15: 0000000000025000

Allocated by task 3295:
 save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:59
 save_stack+0x43/0xd0 mm/kasan/kasan.c:447
 set_track mm/kasan/kasan.c:459 [inline]
 kasan_kmalloc+0xad/0xe0 mm/kasan/kasan.c:551
 __do_kmalloc mm/slab.c:3725 [inline]
 __kmalloc+0x162/0x760 mm/slab.c:3734
 kmalloc include/linux/slab.h:498 [inline]
 tcp_v4_save_options include/net/tcp.h:1962 [inline]
 tcp_v4_init_req+0x2d3/0x3e0 net/ipv4/tcp_ipv4.c:1271
 tcp_conn_request+0xf6d/0x3410 net/ipv4/tcp_input.c:6283
 tcp_v4_conn_request+0x157/0x210 net/ipv4/tcp_ipv4.c:1313
 tcp_rcv_state_process+0x8ea/0x4850 net/ipv4/tcp_input.c:5857
 tcp_v4_do_rcv+0x55c/0x7d0 net/ipv4/tcp_ipv4.c:1482
 tcp_v4_rcv+0x2d10/0x2f80 net/ipv4/tcp_ipv4.c:1711
 ip_local_deliver_finish+0x2e2/0xba0 net/ipv4/ip_input.c:216
 NF_HOOK include/linux/netfilter.h:249 [inline]
 ip_local_deliver+0x1ce/0x6e0 net/ipv4/ip_input.c:257
 dst_input include/net/dst.h:464 [inline]
 ip_rcv_finish+0x887/0x19a0 net/ipv4/ip_input.c:397
 NF_HOOK include/linux/netfilter.h:249 [inline]
 ip_rcv+0xc3f/0x1820 net/ipv4/ip_input.c:493
 __netif_receive_skb_core+0x1a3e/0x34b0 net/core/dev.c:4476
 __netif_receive_skb+0x2c/0x1b0 net/core/dev.c:4514
 netif_receive_skb_internal+0x10b/0x670 net/core/dev.c:4587
 netif_receive_skb+0xae/0x390 net/core/dev.c:4611
 tun_rx_batched.isra.50+0x5ed/0x860 drivers/net/tun.c:1372
 tun_get_user+0x249c/0x36d0 drivers/net/tun.c:1766
 tun_chr_write_iter+0xbf/0x160 drivers/net/tun.c:1792
 call_write_iter include/linux/fs.h:1770 [inline]
 new_sync_write fs/read_write.c:468 [inline]
 __vfs_write+0x68a/0x970 fs/read_write.c:481
 vfs_write+0x18f/0x510 fs/read_write.c:543
 SYSC_write fs/read_write.c:588 [inline]
 SyS_write+0xef/0x220 fs/read_write.c:580
 entry_SYSCALL_64_fastpath+0x1f/0xbe

Freed by task 3306:
 save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:59
 save_stack+0x43/0xd0 mm/kasan/kasan.c:447
 set_track mm/kasan/kasan.c:459 [inline]
 kasan_slab_free+0x71/0xc0 mm/kasan/kasan.c:524
 __cache_free mm/slab.c:3503 [inline]
 kfree+0xca/0x250 mm/slab.c:3820
 inet_sock_destruct+0x59d/0x950 net/ipv4/af_inet.c:157
 __sk_destruct+0xfd/0x910 net/core/sock.c:1560
 sk_destruct+0x47/0x80 net/core/sock.c:1595
 __sk_free+0x57/0x230 net/core/sock.c:1603
 sk_free+0x2a/0x40 net/core/sock.c:1614
 sock_put include/net/sock.h:1652 [inline]
 inet_csk_complete_hashdance+0xd5/0xf0 net/ipv4/inet_connection_sock.c:959
 tcp_check_req+0xf4d/0x1620 net/ipv4/tcp_minisocks.c:765
 tcp_v4_rcv+0x17f6/0x2f80 net/ipv4/tcp_ipv4.c:1675
 ip_local_deliver_finish+0x2e2/0xba0 net/ipv4/ip_input.c:216
 NF_HOOK include/linux/netfilter.h:249 [inline]
 ip_local_deliver+0x1ce/0x6e0 net/ipv4/ip_input.c:257
 dst_input include/net/dst.h:464 [inline]
 ip_rcv_finish+0x887/0x19a0 net/ipv4/ip_input.c:397
 NF_HOOK include/linux/netfilter.h:249 [inline]
 ip_rcv+0xc3f/0x1820 net/ipv4/ip_input.c:493
 __netif_receive_skb_core+0x1a3e/0x34b0 net/core/dev.c:4476
 __netif_receive_skb+0x2c/0x1b0 net/core/dev.c:4514
 netif_receive_skb_internal+0x10b/0x670 net/core/dev.c:4587
 netif_receive_skb+0xae/0x390 net/core/dev.c:4611
 tun_rx_batched.isra.50+0x5ed/0x860 drivers/net/tun.c:1372
 tun_get_user+0x249c/0x36d0 drivers/net/tun.c:1766
 tun_chr_write_iter+0xbf/0x160 drivers/net/tun.c:1792
 call_write_iter include/linux/fs.h:1770 [inline]
 new_sync_write fs/read_write.c:468 [inline]
 __vfs_write+0x68a/0x970 fs/read_write.c:481
 vfs_write+0x18f/0x510 fs/read_write.c:543
 SYSC_write fs/read_write.c:588 [inline]
 SyS_write+0xef/0x220 fs/read_write.c:580
 entry_SYSCALL_64_fastpath+0x1f/0xbe

Fixes: e994b2f0fb92 ("tcp: do not lock listener to process SYN packets")
Fixes: 079096f103fa ("tcp/dccp: install syn_recv requests into ehash table")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_sock.h         |  2 +-
 net/dccp/ipv4.c                 | 13 ++++++++-----
 net/ipv4/cipso_ipv4.c           | 24 +++++++-----------------
 net/ipv4/inet_connection_sock.c |  8 +++-----
 net/ipv4/syncookies.c           |  2 +-
 net/ipv4/tcp_input.c            |  2 +-
 net/ipv4/tcp_ipv4.c             | 22 +++++++++++++---------
 7 files changed, 34 insertions(+), 39 deletions(-)

(limited to 'include/net')

diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index aa95053dfc78..425752f768d2 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -96,7 +96,7 @@ struct inet_request_sock {
 	kmemcheck_bitfield_end(flags);
 	u32                     ir_mark;
 	union {
-		struct ip_options_rcu	*opt;
+		struct ip_options_rcu __rcu	*ireq_opt;
 #if IS_ENABLED(CONFIG_IPV6)
 		struct {
 			struct ipv6_txoptions	*ipv6_opt;
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 001c08696334..0490916864f9 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -414,8 +414,7 @@ struct sock *dccp_v4_request_recv_sock(const struct sock *sk,
 	sk_daddr_set(newsk, ireq->ir_rmt_addr);
 	sk_rcv_saddr_set(newsk, ireq->ir_loc_addr);
 	newinet->inet_saddr	= ireq->ir_loc_addr;
-	newinet->inet_opt	= ireq->opt;
-	ireq->opt	   = NULL;
+	RCU_INIT_POINTER(newinet->inet_opt, rcu_dereference(ireq->ireq_opt));
 	newinet->mc_index  = inet_iif(skb);
 	newinet->mc_ttl	   = ip_hdr(skb)->ttl;
 	newinet->inet_id   = jiffies;
@@ -430,7 +429,10 @@ struct sock *dccp_v4_request_recv_sock(const struct sock *sk,
 	if (__inet_inherit_port(sk, newsk) < 0)
 		goto put_and_exit;
 	*own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
-
+	if (*own_req)
+		ireq->ireq_opt = NULL;
+	else
+		newinet->inet_opt = NULL;
 	return newsk;
 
 exit_overflow:
@@ -441,6 +443,7 @@ exit:
 	__NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENDROPS);
 	return NULL;
 put_and_exit:
+	newinet->inet_opt = NULL;
 	inet_csk_prepare_forced_close(newsk);
 	dccp_done(newsk);
 	goto exit;
@@ -492,7 +495,7 @@ static int dccp_v4_send_response(const struct sock *sk, struct request_sock *req
 							      ireq->ir_rmt_addr);
 		err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
 					    ireq->ir_rmt_addr,
-					    ireq->opt);
+					    rcu_dereference(ireq->ireq_opt));
 		err = net_xmit_eval(err);
 	}
 
@@ -548,7 +551,7 @@ out:
 static void dccp_v4_reqsk_destructor(struct request_sock *req)
 {
 	dccp_feat_list_purge(&dccp_rsk(req)->dreq_featneg);
-	kfree(inet_rsk(req)->opt);
+	kfree(rcu_dereference_protected(inet_rsk(req)->ireq_opt, 1));
 }
 
 void dccp_syn_ack_timeout(const struct request_sock *req)
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index 2ae8f54cb321..82178cc69c96 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -1951,7 +1951,7 @@ int cipso_v4_req_setattr(struct request_sock *req,
 	buf = NULL;
 
 	req_inet = inet_rsk(req);
-	opt = xchg(&req_inet->opt, opt);
+	opt = xchg((__force struct ip_options_rcu **)&req_inet->ireq_opt, opt);
 	if (opt)
 		kfree_rcu(opt, rcu);
 
@@ -1973,11 +1973,13 @@ req_setattr_failure:
  * values on failure.
  *
  */
-static int cipso_v4_delopt(struct ip_options_rcu **opt_ptr)
+static int cipso_v4_delopt(struct ip_options_rcu __rcu **opt_ptr)
 {
+	struct ip_options_rcu *opt = rcu_dereference_protected(*opt_ptr, 1);
 	int hdr_delta = 0;
-	struct ip_options_rcu *opt = *opt_ptr;
 
+	if (!opt || opt->opt.cipso == 0)
+		return 0;
 	if (opt->opt.srr || opt->opt.rr || opt->opt.ts || opt->opt.router_alert) {
 		u8 cipso_len;
 		u8 cipso_off;
@@ -2039,14 +2041,10 @@ static int cipso_v4_delopt(struct ip_options_rcu **opt_ptr)
  */
 void cipso_v4_sock_delattr(struct sock *sk)
 {
-	int hdr_delta;
-	struct ip_options_rcu *opt;
 	struct inet_sock *sk_inet;
+	int hdr_delta;
 
 	sk_inet = inet_sk(sk);
-	opt = rcu_dereference_protected(sk_inet->inet_opt, 1);
-	if (!opt || opt->opt.cipso == 0)
-		return;
 
 	hdr_delta = cipso_v4_delopt(&sk_inet->inet_opt);
 	if (sk_inet->is_icsk && hdr_delta > 0) {
@@ -2066,15 +2064,7 @@ void cipso_v4_sock_delattr(struct sock *sk)
  */
 void cipso_v4_req_delattr(struct request_sock *req)
 {
-	struct ip_options_rcu *opt;
-	struct inet_request_sock *req_inet;
-
-	req_inet = inet_rsk(req);
-	opt = req_inet->opt;
-	if (!opt || opt->opt.cipso == 0)
-		return;
-
-	cipso_v4_delopt(&req_inet->opt);
+	cipso_v4_delopt(&inet_rsk(req)->ireq_opt);
 }
 
 /**
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 67aec7a10686..5ec9136a7c36 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -540,9 +540,10 @@ struct dst_entry *inet_csk_route_req(const struct sock *sk,
 {
 	const struct inet_request_sock *ireq = inet_rsk(req);
 	struct net *net = read_pnet(&ireq->ireq_net);
-	struct ip_options_rcu *opt = ireq->opt;
+	struct ip_options_rcu *opt;
 	struct rtable *rt;
 
+	opt = rcu_dereference(ireq->ireq_opt);
 	flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark,
 			   RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
 			   sk->sk_protocol, inet_sk_flowi_flags(sk),
@@ -576,10 +577,9 @@ struct dst_entry *inet_csk_route_child_sock(const struct sock *sk,
 	struct flowi4 *fl4;
 	struct rtable *rt;
 
+	opt = rcu_dereference(ireq->ireq_opt);
 	fl4 = &newinet->cork.fl.u.ip4;
 
-	rcu_read_lock();
-	opt = rcu_dereference(newinet->inet_opt);
 	flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark,
 			   RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
 			   sk->sk_protocol, inet_sk_flowi_flags(sk),
@@ -592,13 +592,11 @@ struct dst_entry *inet_csk_route_child_sock(const struct sock *sk,
 		goto no_route;
 	if (opt && opt->opt.is_strictroute && rt->rt_uses_gateway)
 		goto route_err;
-	rcu_read_unlock();
 	return &rt->dst;
 
 route_err:
 	ip_rt_put(rt);
 no_route:
-	rcu_read_unlock();
 	__IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES);
 	return NULL;
 }
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index b1bb1b3a1082..77cf32a80952 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -355,7 +355,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
 	/* We throwed the options of the initial SYN away, so we hope
 	 * the ACK carries the same options again (see RFC1122 4.2.3.8)
 	 */
-	ireq->opt = tcp_v4_save_options(sock_net(sk), skb);
+	RCU_INIT_POINTER(ireq->ireq_opt, tcp_v4_save_options(sock_net(sk), skb));
 
 	if (security_inet_conn_request(sk, skb, req)) {
 		reqsk_free(req);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index c5d7656beeee..7eec3383702b 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -6196,7 +6196,7 @@ struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops,
 		struct inet_request_sock *ireq = inet_rsk(req);
 
 		kmemcheck_annotate_bitfield(ireq, flags);
-		ireq->opt = NULL;
+		ireq->ireq_opt = NULL;
 #if IS_ENABLED(CONFIG_IPV6)
 		ireq->pktopts = NULL;
 #endif
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 85164d4d3e53..4c43365c374c 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -877,7 +877,7 @@ static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst,
 
 		err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
 					    ireq->ir_rmt_addr,
-					    ireq->opt);
+					    rcu_dereference(ireq->ireq_opt));
 		err = net_xmit_eval(err);
 	}
 
@@ -889,7 +889,7 @@ static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst,
  */
 static void tcp_v4_reqsk_destructor(struct request_sock *req)
 {
-	kfree(inet_rsk(req)->opt);
+	kfree(rcu_dereference_protected(inet_rsk(req)->ireq_opt, 1));
 }
 
 #ifdef CONFIG_TCP_MD5SIG
@@ -1265,10 +1265,11 @@ static void tcp_v4_init_req(struct request_sock *req,
 			    struct sk_buff *skb)
 {
 	struct inet_request_sock *ireq = inet_rsk(req);
+	struct net *net = sock_net(sk_listener);
 
 	sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr);
 	sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr);
-	ireq->opt = tcp_v4_save_options(sock_net(sk_listener), skb);
+	RCU_INIT_POINTER(ireq->ireq_opt, tcp_v4_save_options(net, skb));
 }
 
 static struct dst_entry *tcp_v4_route_req(const struct sock *sk,
@@ -1355,10 +1356,9 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
 	sk_daddr_set(newsk, ireq->ir_rmt_addr);
 	sk_rcv_saddr_set(newsk, ireq->ir_loc_addr);
 	newsk->sk_bound_dev_if = ireq->ir_iif;
-	newinet->inet_saddr	      = ireq->ir_loc_addr;
-	inet_opt	      = ireq->opt;
-	rcu_assign_pointer(newinet->inet_opt, inet_opt);
-	ireq->opt	      = NULL;
+	newinet->inet_saddr   = ireq->ir_loc_addr;
+	inet_opt	      = rcu_dereference(ireq->ireq_opt);
+	RCU_INIT_POINTER(newinet->inet_opt, inet_opt);
 	newinet->mc_index     = inet_iif(skb);
 	newinet->mc_ttl	      = ip_hdr(skb)->ttl;
 	newinet->rcv_tos      = ip_hdr(skb)->tos;
@@ -1403,9 +1403,12 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
 	if (__inet_inherit_port(sk, newsk) < 0)
 		goto put_and_exit;
 	*own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
-	if (*own_req)
+	if (likely(*own_req)) {
 		tcp_move_syn(newtp, req);
-
+		ireq->ireq_opt = NULL;
+	} else {
+		newinet->inet_opt = NULL;
+	}
 	return newsk;
 
 exit_overflow:
@@ -1416,6 +1419,7 @@ exit:
 	tcp_listendrop(sk);
 	return NULL;
 put_and_exit:
+	newinet->inet_opt = NULL;
 	inet_csk_prepare_forced_close(newsk);
 	tcp_done(newsk);
 	goto exit;
-- 
cgit v1.2.3