150 files changed, 3800 insertions, 1976 deletions
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index 42d0997e2fbb..8a8f77a247e6 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -733,7 +733,7 @@ void bt_procfs_cleanup(struct net *net, const char *name)
 EXPORT_SYMBOL(bt_procfs_init);
 EXPORT_SYMBOL(bt_procfs_cleanup);
 
-static struct net_proto_family bt_sock_family_ops = {
+static const struct net_proto_family bt_sock_family_ops = {
 	.owner	= THIS_MODULE,
 	.family	= PF_BLUETOOTH,
 	.create	= bt_sock_create,
diff --git a/net/bluetooth/ecdh_helper.c b/net/bluetooth/ecdh_helper.c
index 24d4e60f8c48..c7b1a9aee579 100644
--- a/net/bluetooth/ecdh_helper.c
+++ b/net/bluetooth/ecdh_helper.c
@@ -89,11 +89,9 @@ bool compute_ecdh_secret(const u8 public_key[64], const u8 private_key[32],
 	p.curve_id = ECC_CURVE_NIST_P256;
 	buf_len = crypto_ecdh_key_len(&p);
 	buf = kmalloc(buf_len, GFP_KERNEL);
-	if (!buf) {
-		pr_err("alg: kpp: Failed to allocate %d bytes for buf\n",
-		       buf_len);
+	if (!buf)
 		goto free_req;
-	}
+
 	crypto_ecdh_encode_key(buf, buf_len, &p);
 
 	/* Set A private Key */
@@ -170,11 +168,8 @@ bool generate_ecdh_keys(u8 public_key[64], u8 private_key[32])
 	p.key_size = 32;
 	buf_len = crypto_ecdh_key_len(&p);
 	buf = kmalloc(buf_len, GFP_KERNEL);
-	if (!buf) {
-		pr_err("alg: kpp: Failed to allocate %d bytes for buf\n",
-		       buf_len);
+	if (!buf)
 		goto free_req;
-	}
 
 	do {
 		if (tries++ >= max_tries)
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 05686776a5fb..7655b4005dfb 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -148,13 +148,13 @@ static ssize_t vendor_diag_write(struct file *file, const char __user *user_buf,
 		return -EINVAL;
 
 	/* When the diagnostic flags are not persistent and the transport
-	 * is not active, then there is no need for the vendor callback.
-	 *
-	 * Instead just store the desired value. If needed the setting
-	 * will be programmed when the controller gets powered on.
+	 * is not active or in user channel operation, then there is no need
+	 * for the vendor callback. Instead just store the desired value and
+	 * the setting will be programmed when the controller gets powered on.
 	 */
 	if (test_bit(HCI_QUIRK_NON_PERSISTENT_DIAG, &hdev->quirks) &&
-	    !test_bit(HCI_RUNNING, &hdev->flags))
+	    (!test_bit(HCI_RUNNING, &hdev->flags) ||
+	     hci_dev_test_flag(hdev, HCI_USER_CHANNEL)))
 		goto done;
 
 	hci_req_sync_lock(hdev);
@@ -635,6 +635,14 @@ static int hci_init3_req(struct hci_request *req, unsigned long opt)
 						 * Report
 						 */
 
+		/* If the controller supports Channel Selection Algorithm #2
+		 * feature, enable the corresponding event.
+		 */
+		if (hdev->le_features[1] & HCI_LE_CHAN_SEL_ALG2)
+			events[2] |= 0x08;	/* LE Channel Selection
+						 * Algorithm
+						 */
+
 		/* If the controller supports the LE Set Scan Enable command,
 		 * enable the corresponding advertising report event.
 		 */
@@ -677,6 +685,12 @@ static int hci_init3_req(struct hci_request *req, unsigned long opt)
 		if (hdev->commands[34] & 0x04)
 			events[1] |= 0x01;	/* LE Generate DHKey Complete */
 
+		/* If the controller supports the LE Set Default PHY or
+		 * LE Set PHY commands, enable the corresponding event.
+		 */
+		if (hdev->commands[35] & (0x20 | 0x40))
+			events[1] |= 0x08;        /* LE PHY Update Complete */
+
 		hci_req_add(req, HCI_OP_LE_SET_EVENT_MASK, sizeof(events),
 			    events);
 
@@ -771,6 +785,27 @@ static int hci_init4_req(struct hci_request *req, unsigned long opt)
 			    sizeof(support), &support);
 	}
 
+	/* Set Suggested Default Data Length to maximum if supported */
+	if (hdev->le_features[0] & HCI_LE_DATA_LEN_EXT) {
+		struct hci_cp_le_write_def_data_len cp;
+
+		cp.tx_len = hdev->le_max_tx_len;
+		cp.tx_time = hdev->le_max_tx_time;
+		hci_req_add(req, HCI_OP_LE_WRITE_DEF_DATA_LEN, sizeof(cp), &cp);
+	}
+
+	/* Set Default PHY parameters if command is supported */
+	if (hdev->commands[35] & 0x20) {
+		struct hci_cp_le_set_default_phy cp;
+
+		/* No transmitter PHY or receiver PHY preferences */
+		cp.all_phys = 0x03;
+		cp.tx_phys = 0;
+		cp.rx_phys = 0;
+
+		hci_req_add(req, HCI_OP_LE_SET_DEFAULT_PHY, sizeof(cp), &cp);
+	}
+
 	return 0;
 }
 
@@ -1384,6 +1419,7 @@ static int hci_dev_do_open(struct hci_dev *hdev)
 	 * completed.
 	 */
 	if (test_bit(HCI_QUIRK_NON_PERSISTENT_DIAG, &hdev->quirks) &&
+	    !hci_dev_test_flag(hdev, HCI_USER_CHANNEL) &&
 	    hci_dev_test_flag(hdev, HCI_VENDOR_DIAG) && hdev->set_diag)
 		ret = hdev->set_diag(hdev, true);
 
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 7f8d05cf9065..f3aef22931ab 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -138,7 +138,7 @@ void br_manage_promisc(struct net_bridge *br)
 	/* If vlan filtering is disabled or bridge interface is placed
 	 * into promiscuous mode, place all ports in promiscuous mode.
 	 */
-	if ((br->dev->flags & IFF_PROMISC) || !br_vlan_enabled(br))
+	if ((br->dev->flags & IFF_PROMISC) || !br_vlan_enabled(br->dev))
 		set_all = true;
 
 	list_for_each_entry(p, &br->port_list, list) {
diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index b0845480a3ae..09dcdb9c0f3c 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -599,7 +599,7 @@ static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh,
 		return -EINVAL;
 
 	vg = nbp_vlan_group(p);
-	if (br_vlan_enabled(br) && vg && entry->vid == 0) {
+	if (br_vlan_enabled(br->dev) && vg && entry->vid == 0) {
 		list_for_each_entry(v, &vg->vlan_list, vlist) {
 			entry->vid = v->vid;
 			err = __br_mdb_add(net, br, entry);
@@ -694,7 +694,7 @@ static int br_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh,
 		return -EINVAL;
 
 	vg = nbp_vlan_group(p);
-	if (br_vlan_enabled(br) && vg && entry->vid == 0) {
+	if (br_vlan_enabled(br->dev) && vg && entry->vid == 0) {
 		list_for_each_entry(v, &vg->vlan_list, vlist) {
 			entry->vid = v->vid;
 			err = __br_mdb_del(br, entry);
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index faa7261a992f..8dc5c8d69bcd 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -2176,6 +2176,14 @@ unlock:
 	return err;
 }
 
+bool br_multicast_enabled(const struct net_device *dev)
+{
+	struct net_bridge *br = netdev_priv(dev);
+
+	return !br->multicast_disabled;
+}
+EXPORT_SYMBOL_GPL(br_multicast_enabled);
+
 int br_multicast_set_querier(struct net_bridge *br, unsigned long val)
 {
 	unsigned long max_delay;
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 32bd3ead9ba1..3bcda556971e 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -1251,7 +1251,7 @@ static int br_fill_info(struct sk_buff *skb, const struct net_device *brdev)
 	u32 ageing_time = jiffies_to_clock_t(br->ageing_time);
 	u32 stp_enabled = br->stp_enabled;
 	u16 priority = (br->bridge_id.prio[0] << 8) | br->bridge_id.prio[1];
-	u8 vlan_enabled = br_vlan_enabled(br);
+	u8 vlan_enabled = br_vlan_enabled(br->dev);
 	u64 clockval;
 
 	clockval = br_timer_value(&br->hello_timer);
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 0d177280aa84..20626927f433 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -854,10 +854,6 @@ static inline u16 br_get_pvid(const struct net_bridge_vlan_group *vg)
 	return vg->pvid;
 }
 
-static inline int br_vlan_enabled(struct net_bridge *br)
-{
-	return br->vlan_enabled;
-}
 #else
 static inline bool br_allowed_ingress(const struct net_bridge *br,
 				      struct net_bridge_vlan_group *vg,
@@ -945,11 +941,6 @@ static inline u16 br_get_pvid(const struct net_bridge_vlan_group *vg)
 	return 0;
 }
 
-static inline int br_vlan_enabled(struct net_bridge *br)
-{
-	return 0;
-}
-
 static inline int __br_vlan_filter_toggle(struct net_bridge *br,
 					  unsigned long val)
 {
diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c
index 6f12a5271219..89110319ef0f 100644
--- a/net/bridge/br_stp_if.c
+++ b/net/bridge/br_stp_if.c
@@ -150,7 +150,6 @@ static int br_stp_call_user(struct net_bridge *br, char *arg)
 
 static void br_stp_start(struct net_bridge *br)
 {
-	struct net_bridge_port *p;
 	int err = -ENOENT;
 
 	if (net_eq(dev_net(br->dev), &init_net))
@@ -169,11 +168,6 @@ static void br_stp_start(struct net_bridge *br)
 	if (!err) {
 		br->stp_enabled = BR_USER_STP;
 		br_debug(br, "userspace STP started\n");
-
-		/* Stop hello and hold timers */
-		del_timer(&br->hello_timer);
-		list_for_each_entry(p, &br->port_list, list)
-			del_timer(&p->hold_timer);
 	} else {
 		br->stp_enabled = BR_KERNEL_STP;
 		br_debug(br, "using kernel STP\n");
@@ -189,7 +183,6 @@ static void br_stp_start(struct net_bridge *br)
 
 static void br_stp_stop(struct net_bridge *br)
 {
-	struct net_bridge_port *p;
 	int err;
 
 	if (br->stp_enabled == BR_USER_STP) {
@@ -198,10 +191,6 @@ static void br_stp_stop(struct net_bridge *br)
 			br_err(br, "failed to stop userspace STP (%d)\n", err);
 
 		/* To start timers on any ports left in blocking */
-		mod_timer(&br->hello_timer, jiffies + br->hello_time);
-		list_for_each_entry(p, &br->port_list, list)
-			mod_timer(&p->hold_timer,
-				  round_jiffies(jiffies + BR_HOLD_TIME));
 		spin_lock_bh(&br->lock);
 		br_port_state_selection(br);
 		spin_unlock_bh(&br->lock);
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index b838213c408e..26a1a56639b2 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -706,6 +706,14 @@ int br_vlan_filter_toggle(struct net_bridge *br, unsigned long val)
 	return __br_vlan_filter_toggle(br, val);
 }
 
+bool br_vlan_enabled(const struct net_device *dev)
+{
+	struct net_bridge *br = netdev_priv(dev);
+
+	return !!br->vlan_enabled;
+}
+EXPORT_SYMBOL_GPL(br_vlan_enabled);
+
 int __br_vlan_set_proto(struct net_bridge *br, __be16 proto)
 {
 	int err = 0;
diff --git a/net/bridge/netfilter/nft_reject_bridge.c b/net/bridge/netfilter/nft_reject_bridge.c
index 346ef6b00b8f..c16dd3a47fc6 100644
--- a/net/bridge/netfilter/nft_reject_bridge.c
+++ b/net/bridge/netfilter/nft_reject_bridge.c
@@ -111,7 +111,7 @@ static void nft_reject_br_send_v4_unreach(struct net *net,
 	__wsum csum;
 	u8 proto;
 
-	if (oldskb->csum_bad || !nft_bridge_iphdr_validate(oldskb))
+	if (!nft_bridge_iphdr_validate(oldskb))
 		return;
 
 	/* IP header checks: fragment. */
@@ -226,9 +226,6 @@ static bool reject6_br_csum_ok(struct sk_buff *skb, int hook)
 	__be16 fo;
 	u8 proto = ip6h->nexthdr;
 
-	if (skb->csum_bad)
-		return false;
-
 	if (skb_csum_unnecessary(skb))
 		return true;
 
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index adcad344c843..4674d17e7c08 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -1099,7 +1099,7 @@ static int caif_create(struct net *net, struct socket *sock, int protocol,
 }
 
 
-static struct net_proto_family caif_family_ops = {
+static const struct net_proto_family caif_family_ops = {
 	.family = PF_CAIF,
 	.create = caif_create,
 	.owner = THIS_MODULE,
diff --git a/net/core/datagram.c b/net/core/datagram.c
index db1866f2ffcf..bc46118486fe 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -161,6 +161,45 @@ done:
 	return skb;
 }
 
+struct sk_buff *__skb_try_recv_from_queue(struct sock *sk,
+					  struct sk_buff_head *queue,
+					  unsigned int flags,
+					  void (*destructor)(struct sock *sk,
+							   struct sk_buff *skb),
+					  int *peeked, int *off, int *err,
+					  struct sk_buff **last)
+{
+	struct sk_buff *skb;
+	int _off = *off;
+
+	*last = queue->prev;
+	skb_queue_walk(queue, skb) {
+		if (flags & MSG_PEEK) {
+			if (_off >= skb->len && (skb->len || _off ||
+						 skb->peeked)) {
+				_off -= skb->len;
+				continue;
+			}
+			if (!skb->len) {
+				skb = skb_set_peeked(skb);
+				if (unlikely(IS_ERR(skb))) {
+					*err = PTR_ERR(skb);
+					return NULL;
+				}
+			}
+			*peeked = 1;
+			atomic_inc(&skb->users);
+		} else {
+			__skb_unlink(skb, queue);
+			if (destructor)
+				destructor(sk, skb);
+		}
+		*off = _off;
+		return skb;
+	}
+	return NULL;
+}
+
 /**
  *	__skb_try_recv_datagram - Receive a datagram skbuff
  *	@sk: socket
@@ -222,40 +261,14 @@ struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned int flags,
 		 * Look at current nfs client by the way...
 		 * However, this function was correct in any case. 8)
 		 */
-		int _off = *off;
-
-		*last = (struct sk_buff *)queue;
 		spin_lock_irqsave(&queue->lock, cpu_flags);
-		skb_queue_walk(queue, skb) {
-			*last = skb;
-			if (flags & MSG_PEEK) {
-				if (_off >= skb->len && (skb->len || _off ||
-							 skb->peeked)) {
-					_off -= skb->len;
-					continue;
-				}
-				if (!skb->len) {
-					skb = skb_set_peeked(skb);
-					if (IS_ERR(skb)) {
-						error = PTR_ERR(skb);
-						spin_unlock_irqrestore(&queue->lock,
-								       cpu_flags);
-						goto no_packet;
-					}
-				}
-				*peeked = 1;
-				atomic_inc(&skb->users);
-			} else {
-				__skb_unlink(skb, queue);
-				if (destructor)
-					destructor(sk, skb);
-			}
-			spin_unlock_irqrestore(&queue->lock, cpu_flags);
-			*off = _off;
-			return skb;
-		}
-
+		skb = __skb_try_recv_from_queue(sk, queue, flags, destructor,
+						peeked, off, &error, last);
 		spin_unlock_irqrestore(&queue->lock, cpu_flags);
+		if (error)
+			goto no_packet;
+		if (skb)
+			return skb;
 
 		if (!sk_can_busy_loop(sk))
 			break;
@@ -335,8 +348,8 @@ void __skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb, int len)
 }
 EXPORT_SYMBOL(__skb_free_datagram_locked);
 
-int __sk_queue_drop_skb(struct sock *sk, struct sk_buff *skb,
-			unsigned int flags,
+int __sk_queue_drop_skb(struct sock *sk, struct sk_buff_head *sk_queue,
+			struct sk_buff *skb, unsigned int flags,
 			void (*destructor)(struct sock *sk,
 					   struct sk_buff *skb))
 {
@@ -344,15 +357,15 @@ int __sk_queue_drop_skb(struct sock *sk, struct sk_buff *skb,
 
 	if (flags & MSG_PEEK) {
 		err = -ENOENT;
-		spin_lock_bh(&sk->sk_receive_queue.lock);
-		if (skb == skb_peek(&sk->sk_receive_queue)) {
-			__skb_unlink(skb, &sk->sk_receive_queue);
+		spin_lock_bh(&sk_queue->lock);
+		if (skb == skb_peek(sk_queue)) {
+			__skb_unlink(skb, sk_queue);
 			atomic_dec(&skb->users);
 			if (destructor)
 				destructor(sk, skb);
 			err = 0;
 		}
-		spin_unlock_bh(&sk->sk_receive_queue.lock);
+		spin_unlock_bh(&sk_queue->lock);
 	}
 
 	atomic_inc(&sk->sk_drops);
@@ -383,7 +396,8 @@ EXPORT_SYMBOL(__sk_queue_drop_skb);
 
 int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags)
 {
-	int err = __sk_queue_drop_skb(sk, skb, flags, NULL);
+	int err = __sk_queue_drop_skb(sk, &sk->sk_receive_queue, skb, flags,
+				      NULL);
 
 	kfree_skb(skb);
 	sk_mem_reclaim_partial(sk);
diff --git a/net/core/dev.c b/net/core/dev.c
index fca407b4a6ea..8f72f4a9c6ac 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -105,6 +105,7 @@
 #include <net/dst.h>
 #include <net/dst_metadata.h>
 #include <net/pkt_sched.h>
+#include <net/pkt_cls.h>
 #include <net/checksum.h>
 #include <net/xfrm.h>
 #include <linux/highmem.h>
@@ -142,6 +143,7 @@
 #include <linux/hrtimer.h>
 #include <linux/netfilter_ingress.h>
 #include <linux/crash_dump.h>
+#include <linux/sctp.h>
 
 #include "net-sysfs.h"
 
@@ -161,6 +163,7 @@ static int netif_rx_internal(struct sk_buff *skb);
 static int call_netdevice_notifiers_info(unsigned long val,
 					 struct net_device *dev,
 					 struct netdev_notifier_info *info);
+static struct napi_struct *napi_by_id(unsigned int napi_id);
 
 /*
  * The @dev_base_head list is protected by @dev_base_lock and the rtnl
@@ -865,6 +868,31 @@ struct net_device *dev_get_by_index(struct net *net, int ifindex)
 EXPORT_SYMBOL(dev_get_by_index);
 
 /**
+ *	dev_get_by_napi_id - find a device by napi_id
+ *	@napi_id: ID of the NAPI struct
+ *
+ *	Search for an interface by NAPI ID. Returns %NULL if the device
+ *	is not found or a pointer to the device. The device has not had
+ *	its reference counter increased so the caller must be careful
+ *	about locking. The caller must hold RCU lock.
+ */
+
+struct net_device *dev_get_by_napi_id(unsigned int napi_id)
+{
+	struct napi_struct *napi;
+
+	WARN_ON_ONCE(!rcu_read_lock_held());
+
+	if (napi_id < MIN_NAPI_ID)
+		return NULL;
+
+	napi = napi_by_id(napi_id);
+
+	return napi ? napi->dev : NULL;
+}
+EXPORT_SYMBOL(dev_get_by_napi_id);
+
+/**
  *	netdev_get_name - get a netdevice name, knowing its ifindex.
  *	@net: network namespace
  *	@name: a pointer to the buffer where the name will be stored.
@@ -2611,6 +2639,47 @@ out:
 }
 EXPORT_SYMBOL(skb_checksum_help);
 
+int skb_crc32c_csum_help(struct sk_buff *skb)
+{
+	__le32 crc32c_csum;
+	int ret = 0, offset, start;
+
+	if (skb->ip_summed != CHECKSUM_PARTIAL)
+		goto out;
+
+	if (unlikely(skb_is_gso(skb)))
+		goto out;
+
+	/* Before computing a checksum, we should make sure no frag could
+	 * be modified by an external entity : checksum could be wrong.
+	 */
+	if (unlikely(skb_has_shared_frag(skb))) {
+		ret = __skb_linearize(skb);
+		if (ret)
+			goto out;
+	}
+	start = skb_checksum_start_offset(skb);
+	offset = start + offsetof(struct sctphdr, checksum);
+	if (WARN_ON_ONCE(offset >= skb_headlen(skb))) {
+		ret = -EINVAL;
+		goto out;
+	}
+	if (skb_cloned(skb) &&
+	    !skb_clone_writable(skb, offset + sizeof(__le32))) {
+		ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
+		if (ret)
+			goto out;
+	}
+	crc32c_csum = cpu_to_le32(~__skb_checksum(skb, start,
+						  skb->len - start, ~(__u32)0,
+						  crc32c_csum_stub));
+	*(__le32 *)(skb->data + offset) = crc32c_csum;
+	skb->ip_summed = CHECKSUM_NONE;
+	skb->csum_not_inet = 0;
+out:
+	return ret;
+}
+
 __be16 skb_network_protocol(struct sk_buff *skb, int *depth)
 {
 	__be16 type = skb->protocol;
@@ -2953,6 +3022,17 @@ static struct sk_buff *validate_xmit_vlan(struct sk_buff *skb,
 	return skb;
 }
 
+int skb_csum_hwoffload_help(struct sk_buff *skb,
+			    const netdev_features_t features)
+{
+	if (unlikely(skb->csum_not_inet))
+		return !!(features & NETIF_F_SCTP_CRC) ? 0 :
+			skb_crc32c_csum_help(skb);
+
+	return !!(features & NETIF_F_CSUM_MASK) ? 0 : skb_checksum_help(skb);
+}
+EXPORT_SYMBOL(skb_csum_hwoffload_help);
+
 static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device *dev)
 {
 	netdev_features_t features;
@@ -2991,8 +3071,7 @@ static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device
 			else
 				skb_set_transport_header(skb,
 							 skb_checksum_start_offset(skb));
-			if (!(features & NETIF_F_CSUM_MASK) &&
-			    skb_checksum_help(skb))
+			if (skb_csum_hwoffload_help(skb, features))
 				goto out_kfree_skb;
 		}
 	}
@@ -3178,7 +3257,7 @@ sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
 	/* qdisc_skb_cb(skb)->pkt_len was already set by the caller. */
 	qdisc_bstats_cpu_update(cl->q, skb);
 
-	switch (tc_classify(skb, cl, &cl_res, false)) {
+	switch (tcf_classify(skb, cl, &cl_res, false)) {
 	case TC_ACT_OK:
 	case TC_ACT_RECLASSIFY:
 		skb->tc_index = TC_H_MIN(cl_res.classid);
@@ -3190,6 +3269,7 @@ sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
 		return NULL;
 	case TC_ACT_STOLEN:
 	case TC_ACT_QUEUED:
+	case TC_ACT_TRAP:
 		*ret = NET_XMIT_SUCCESS;
 		consume_skb(skb);
 		return NULL;
@@ -3948,7 +4028,7 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
 	skb->tc_at_ingress = 1;
 	qdisc_bstats_cpu_update(cl->q, skb);
 
-	switch (tc_classify(skb, cl, &cl_res, false)) {
+	switch (tcf_classify(skb, cl, &cl_res, false)) {
 	case TC_ACT_OK:
 	case TC_ACT_RECLASSIFY:
 		skb->tc_index = TC_H_MIN(cl_res.classid);
@@ -3959,6 +4039,7 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
 		return NULL;
 	case TC_ACT_STOLEN:
 	case TC_ACT_QUEUED:
+	case TC_ACT_TRAP:
 		consume_skb(skb);
 		return NULL;
 	case TC_ACT_REDIRECT:
@@ -4636,9 +4717,6 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
 	if (netif_elide_gro(skb->dev))
 		goto normal;
 
-	if (skb->csum_bad)
-		goto normal;
-
 	gro_list_prepare(napi, skb);
 
 	rcu_read_lock();
@@ -7008,7 +7086,7 @@ static void rollback_registered_many(struct list_head *head)
 
 		if (!dev->rtnl_link_ops ||
 		    dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
-			skb = rtmsg_ifinfo_build_skb(RTM_DELLINK, dev, ~0U,
+			skb = rtmsg_ifinfo_build_skb(RTM_DELLINK, dev, ~0U, 0,
 						     GFP_KERNEL);
 
 		/*
diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c
index b94b1d293506..77f04e71100f 100644
--- a/net/core/dev_ioctl.c
+++ b/net/core/dev_ioctl.c
@@ -225,6 +225,7 @@ static int net_hwtstamp_validate(struct ifreq *ifr)
 	case HWTSTAMP_FILTER_PTP_V2_EVENT:
 	case HWTSTAMP_FILTER_PTP_V2_SYNC:
 	case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
+	case HWTSTAMP_FILTER_NTP_ALL:
 		rx_filter_valid = 1;
 		break;
 	}
diff --git a/net/core/filter.c b/net/core/filter.c
index a6bb95fa87b2..946f758d44f2 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -352,7 +352,7 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
  *	bpf_convert_filter - convert filter program
  *	@prog: the user passed filter program
  *	@len: the length of the user passed filter program
- *	@new_prog: buffer where converted program will be stored
+ *	@new_prog: allocated 'struct bpf_prog' or NULL
  *	@new_len: pointer to store length of converted program
  *
  * Remap 'sock_filter' style classic BPF (cBPF) instruction set to 'bpf_insn'
@@ -364,14 +364,13 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
  *
  * 2) 2nd pass to remap in two passes: 1st pass finds new
  *    jump offsets, 2nd pass remapping:
- *   new_prog = kmalloc(sizeof(struct bpf_insn) * new_len);
  *   bpf_convert_filter(old_prog, old_len, new_prog, &new_len);
  */
 static int bpf_convert_filter(struct sock_filter *prog, int len,
-			      struct bpf_insn *new_prog, int *new_len)
+			      struct bpf_prog *new_prog, int *new_len)
 {
-	int new_flen = 0, pass = 0, target, i;
-	struct bpf_insn *new_insn;
+	int new_flen = 0, pass = 0, target, i, stack_off;
+	struct bpf_insn *new_insn, *first_insn = NULL;
 	struct sock_filter *fp;
 	int *addrs = NULL;
 	u8 bpf_src;
@@ -383,6 +382,7 @@ static int bpf_convert_filter(struct sock_filter *prog, int len,
 		return -EINVAL;
 
 	if (new_prog) {
+		first_insn = new_prog->insnsi;
 		addrs = kcalloc(len, sizeof(*addrs),
 				GFP_KERNEL | __GFP_NOWARN);
 		if (!addrs)
@@ -390,11 +390,11 @@ static int bpf_convert_filter(struct sock_filter *prog, int len,
 	}
 
 do_pass:
-	new_insn = new_prog;
+	new_insn = first_insn;
 	fp = prog;
 
 	/* Classic BPF related prologue emission. */
-	if (new_insn) {
+	if (new_prog) {
 		/* Classic BPF expects A and X to be reset first. These need
 		 * to be guaranteed to be the first two instructions.
 		 */
@@ -415,7 +415,7 @@ do_pass:
 		struct bpf_insn *insn = tmp_insns;
 
 		if (addrs)
-			addrs[i] = new_insn - new_prog;
+			addrs[i] = new_insn - first_insn;
 
 		switch (fp->code) {
 		/* All arithmetic insns and skb loads map as-is. */
@@ -561,17 +561,25 @@ do_pass:
 		/* Store to stack. */
 		case BPF_ST:
 		case BPF_STX:
+			stack_off = fp->k * 4  + 4;
 			*insn = BPF_STX_MEM(BPF_W, BPF_REG_FP, BPF_CLASS(fp->code) ==
 					    BPF_ST ? BPF_REG_A : BPF_REG_X,
-					    -(BPF_MEMWORDS - fp->k) * 4);
+					    -stack_off);
+			/* check_load_and_stores() verifies that classic BPF can
+			 * load from stack only after write, so tracking
+			 * stack_depth for ST|STX insns is enough
+			 */
+			if (new_prog && new_prog->aux->stack_depth < stack_off)
+				new_prog->aux->stack_depth = stack_off;
 			break;
 
 		/* Load from stack. */
 		case BPF_LD | BPF_MEM:
 		case BPF_LDX | BPF_MEM:
+			stack_off = fp->k * 4  + 4;
 			*insn = BPF_LDX_MEM(BPF_W, BPF_CLASS(fp->code) == BPF_LD  ?
 					    BPF_REG_A : BPF_REG_X, BPF_REG_FP,
-					    -(BPF_MEMWORDS - fp->k) * 4);
+					    -stack_off);
 			break;
 
 		/* A = K or X = K */
@@ -619,13 +627,13 @@ do_pass:
 
 	if (!new_prog) {
 		/* Only calculating new length. */
-		*new_len = new_insn - new_prog;
+		*new_len = new_insn - first_insn;
 		return 0;
 	}
 
 	pass++;
-	if (new_flen != new_insn - new_prog) {
-		new_flen = new_insn - new_prog;
+	if (new_flen != new_insn - first_insn) {
+		new_flen = new_insn - first_insn;
 		if (pass > 2)
 			goto err;
 		goto do_pass;
@@ -1017,7 +1025,7 @@ static struct bpf_prog *bpf_migrate_filter(struct bpf_prog *fp)
 	fp->len = new_len;
 
 	/* 2nd pass: remap sock_filter insns into bpf_insn insns. */
-	err = bpf_convert_filter(old_prog, old_len, fp->insnsi, &new_len);
+	err = bpf_convert_filter(old_prog, old_len, fp, &new_len);
 	if (err)
 		/* 2nd bpf_convert_filter() can fail only if it fails
 		 * to allocate memory, remapping must succeed. Note,
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 28d94bce4df8..fc5fc4594c90 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -18,6 +18,7 @@
 #include <linux/stddef.h>
 #include <linux/if_ether.h>
 #include <linux/mpls.h>
+#include <linux/tcp.h>
 #include <net/flow_dissector.h>
 #include <scsi/fc/fc_fcoe.h>
 
@@ -342,6 +343,64 @@ __skb_flow_dissect_gre(const struct sk_buff *skb,
 	return FLOW_DISSECT_RET_OUT_PROTO_AGAIN;
 }
 
+static void
+__skb_flow_dissect_tcp(const struct sk_buff *skb,
+		       struct flow_dissector *flow_dissector,
+		       void *target_container, void *data, int thoff, int hlen)
+{
+	struct flow_dissector_key_tcp *key_tcp;
+	struct tcphdr *th, _th;
+
+	if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_TCP))
+		return;
+
+	th = __skb_header_pointer(skb, thoff, sizeof(_th), data, hlen, &_th);
+	if (!th)
+		return;
+
+	if (unlikely(__tcp_hdrlen(th) < sizeof(_th)))
+		return;
+
+	key_tcp = skb_flow_dissector_target(flow_dissector,
+					    FLOW_DISSECTOR_KEY_TCP,
+					    target_container);
+	key_tcp->flags = (*(__be16 *) &tcp_flag_word(th) & htons(0x0FFF));
+}
+
+static void
+__skb_flow_dissect_ipv4(const struct sk_buff *skb,
+			struct flow_dissector *flow_dissector,
+			void *target_container, void *data, const struct iphdr *iph)
+{
+	struct flow_dissector_key_ip *key_ip;
+
+	if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_IP))
+		return;
+
+	key_ip = skb_flow_dissector_target(flow_dissector,
+					   FLOW_DISSECTOR_KEY_IP,
+					   target_container);
+	key_ip->tos = iph->tos;
+	key_ip->ttl = iph->ttl;
+}
+
+static void
+__skb_flow_dissect_ipv6(const struct sk_buff *skb,
+			struct flow_dissector *flow_dissector,
+			void *target_container, void *data, const struct ipv6hdr *iph)
+{
+	struct flow_dissector_key_ip *key_ip;
+
+	if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_IP))
+		return;
+
+	key_ip = skb_flow_dissector_target(flow_dissector,
+					   FLOW_DISSECTOR_KEY_IP,
+					   target_container);
+	key_ip->tos = ipv6_get_dsfield(iph);
+	key_ip->ttl = iph->hop_limit;
+}
+
 /**
  * __skb_flow_dissect - extract the flow_keys struct and return it
  * @skb: sk_buff to extract the flow from, can be NULL if the rest are specified
@@ -444,6 +503,9 @@ ip:
 			}
 		}
 
+		__skb_flow_dissect_ipv4(skb, flow_dissector,
+					target_container, data, iph);
+
 		if (flags & FLOW_DISSECTOR_F_STOP_AT_L3)
 			goto out_good;
 
@@ -489,6 +551,9 @@ ipv6:
 				goto out_good;
 		}
 
+		__skb_flow_dissect_ipv6(skb, flow_dissector,
+					target_container, data, iph);
+
 		if (flags & FLOW_DISSECTOR_F_STOP_AT_L3)
 			goto out_good;
 
@@ -683,6 +748,10 @@ ip_proto_again:
 	case IPPROTO_MPLS:
 		proto = htons(ETH_P_MPLS_UC);
 		goto mpls;
+	case IPPROTO_TCP:
+		__skb_flow_dissect_tcp(skb, flow_dissector, target_container,
+				       data, nhoff, hlen);
+		break;
 	default:
 		break;
 	}
diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c
index b3bc0a31af9f..1307731ddfe4 100644
--- a/net/core/lwt_bpf.c
+++ b/net/core/lwt_bpf.c
@@ -240,7 +240,8 @@ static const struct nla_policy bpf_nl_policy[LWT_BPF_MAX + 1] = {
 
 static int bpf_build_state(struct nlattr *nla,
 			   unsigned int family, const void *cfg,
-			   struct lwtunnel_state **ts)
+			   struct lwtunnel_state **ts,
+			   struct netlink_ext_ack *extack)
 {
 	struct nlattr *tb[LWT_BPF_MAX + 1];
 	struct lwtunnel_state *newts;
@@ -250,7 +251,7 @@ static int bpf_build_state(struct nlattr *nla,
 	if (family != AF_INET && family != AF_INET6)
 		return -EAFNOSUPPORT;
 
-	ret = nla_parse_nested(tb, LWT_BPF_MAX, nla, bpf_nl_policy, NULL);
+	ret = nla_parse_nested(tb, LWT_BPF_MAX, nla, bpf_nl_policy, extack);
 	if (ret < 0)
 		return ret;
 
diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c
index cfae3d5fe11f..d9cb3532f1dd 100644
--- a/net/core/lwtunnel.c
+++ b/net/core/lwtunnel.c
@@ -103,37 +103,53 @@ EXPORT_SYMBOL(lwtunnel_encap_del_ops);
 
 int lwtunnel_build_state(u16 encap_type,
 			 struct nlattr *encap, unsigned int family,
-			 const void *cfg, struct lwtunnel_state **lws)
+			 const void *cfg, struct lwtunnel_state **lws,
+			 struct netlink_ext_ack *extack)
 {
 	const struct lwtunnel_encap_ops *ops;
+	bool found = false;
 	int ret = -EINVAL;
 
 	if (encap_type == LWTUNNEL_ENCAP_NONE ||
-	    encap_type > LWTUNNEL_ENCAP_MAX)
+	    encap_type > LWTUNNEL_ENCAP_MAX) {
+		NL_SET_ERR_MSG_ATTR(extack, encap,
+				    "Unknown LWT encapsulation type");
 		return ret;
+	}
 
 	ret = -EOPNOTSUPP;
 	rcu_read_lock();
 	ops = rcu_dereference(lwtun_encaps[encap_type]);
 	if (likely(ops && ops->build_state && try_module_get(ops->owner))) {
-		ret = ops->build_state(encap, family, cfg, lws);
+		found = true;
+		ret = ops->build_state(encap, family, cfg, lws, extack);
 		if (ret)
 			module_put(ops->owner);
 	}
 	rcu_read_unlock();
 
+	/* don't rely on -EOPNOTSUPP to detect match as build_state
+	 * handlers could return it
+	 */
+	if (!found) {
+		NL_SET_ERR_MSG_ATTR(extack, encap,
+				    "LWT encapsulation type not supported");
+	}
+
 	return ret;
 }
 EXPORT_SYMBOL(lwtunnel_build_state);
 
-int lwtunnel_valid_encap_type(u16 encap_type)
+int lwtunnel_valid_encap_type(u16 encap_type, struct netlink_ext_ack *extack)
 {
 	const struct lwtunnel_encap_ops *ops;
 	int ret = -EINVAL;
 
 	if (encap_type == LWTUNNEL_ENCAP_NONE ||
-	    encap_type > LWTUNNEL_ENCAP_MAX)
+	    encap_type > LWTUNNEL_ENCAP_MAX) {
+		NL_SET_ERR_MSG(extack, "Unknown lwt encapsulation type");
 		return ret;
+	}
 
 	rcu_read_lock();
 	ops = rcu_dereference(lwtun_encaps[encap_type]);
@@ -153,11 +169,16 @@ int lwtunnel_valid_encap_type(u16 encap_type)
 		}
 	}
 #endif
-	return ops ? 0 : -EOPNOTSUPP;
+	ret = ops ? 0 : -EOPNOTSUPP;
+	if (ret < 0)
+		NL_SET_ERR_MSG(extack, "lwt encapsulation type not supported");
+
+	return ret;
 }
 EXPORT_SYMBOL(lwtunnel_valid_encap_type);
 
-int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int remaining)
+int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int remaining,
+				   struct netlink_ext_ack *extack)
 {
 	struct rtnexthop *rtnh = (struct rtnexthop *)attr;
 	struct nlattr *nla_entype;
@@ -174,7 +195,8 @@ int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int remaining)
 			if (nla_entype) {
 				encap_type = nla_get_u16(nla_entype);
 
-				if (lwtunnel_valid_encap_type(encap_type) != 0)
+				if (lwtunnel_valid_encap_type(encap_type,
+							      extack) != 0)
 					return -EOPNOTSUPP;
 			}
 		}
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index d274f81fcc2c..dadb5eef91c3 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -118,6 +118,50 @@ unsigned long neigh_rand_reach_time(unsigned long base)
 EXPORT_SYMBOL(neigh_rand_reach_time);
 
 
+static bool neigh_del(struct neighbour *n, __u8 state,
+		      struct neighbour __rcu **np, struct neigh_table *tbl)
+{
+	bool retval = false;
+
+	write_lock(&n->lock);
+	if (atomic_read(&n->refcnt) == 1 && !(n->nud_state & state)) {
+		struct neighbour *neigh;
+
+		neigh = rcu_dereference_protected(n->next,
+						  lockdep_is_held(&tbl->lock));
+		rcu_assign_pointer(*np, neigh);
+		n->dead = 1;
+		retval = true;
+	}
+	write_unlock(&n->lock);
+	if (retval)
+		neigh_cleanup_and_release(n);
+	return retval;
+}
+
+bool neigh_remove_one(struct neighbour *ndel, struct neigh_table *tbl)
+{
+	struct neigh_hash_table *nht;
+	void *pkey = ndel->primary_key;
+	u32 hash_val;
+	struct neighbour *n;
+	struct neighbour __rcu **np;
+
+	nht = rcu_dereference_protected(tbl->nht,
+					lockdep_is_held(&tbl->lock));
+	hash_val = tbl->hash(pkey, ndel->dev, nht->hash_rnd);
+	hash_val = hash_val >> (32 - nht->hash_shift);
+
+	np = &nht->hash_buckets[hash_val];
+	while ((n = rcu_dereference_protected(*np,
+					      lockdep_is_held(&tbl->lock)))) {
+		if (n == ndel)
+			return neigh_del(n, 0, np, tbl);
+		np = &n->next;
+	}
+	return false;
+}
+
 static int neigh_forced_gc(struct neigh_table *tbl)
 {
 	int shrunk = 0;
@@ -140,19 +184,10 @@ static int neigh_forced_gc(struct neigh_table *tbl)
 			 * - nobody refers to it.
 			 * - it is not permanent
 			 */
-			write_lock(&n->lock);
-			if (atomic_read(&n->refcnt) == 1 &&
-			    !(n->nud_state & NUD_PERMANENT)) {
-				rcu_assign_pointer(*np,
-					rcu_dereference_protected(n->next,
-						  lockdep_is_held(&tbl->lock)));
-				n->dead = 1;
-				shrunk	= 1;
-				write_unlock(&n->lock);
-				neigh_cleanup_and_release(n);
+			if (neigh_del(n, NUD_PERMANENT, np, tbl)) {
+				shrunk = 1;
 				continue;
 			}
-			write_unlock(&n->lock);
 			np = &n->next;
 		}
 	}
@@ -1649,7 +1684,10 @@ static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh,
 			   NEIGH_UPDATE_F_OVERRIDE |
 			   NEIGH_UPDATE_F_ADMIN,
 			   NETLINK_CB(skb).portid);
+	write_lock_bh(&tbl->lock);
 	neigh_release(neigh);
+	neigh_remove_one(neigh, tbl);
+	write_unlock_bh(&tbl->lock);
 
 out:
 	return err;
diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c
index 14d09345f00d..4847964931df 100644
--- a/net/core/net-procfs.c
+++ b/net/core/net-procfs.c
@@ -363,15 +363,10 @@ static int dev_mc_seq_show(struct seq_file *seq, void *v)
 
 	netif_addr_lock_bh(dev);
 	netdev_for_each_mc_addr(ha, dev) {
-		int i;
-
-		seq_printf(seq, "%-4d %-15s %-5d %-5d ", dev->ifindex,
-			   dev->name, ha->refcount, ha->global_use);
-
-		for (i = 0; i < dev->addr_len; i++)
-			seq_printf(seq, "%02x", ha->addr[i]);
-
-		seq_putc(seq, '\n');
+		seq_printf(seq, "%-4d %-15s %-5d %-5d %*phN\n",
+			   dev->ifindex, dev->name,
+			   ha->refcount, ha->global_use,
+			   (int)dev->addr_len, ha->addr);
 	}
 	netif_addr_unlock_bh(dev);
 	return 0;
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 65ea0ff4017c..58e6cc70500d 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -323,7 +323,11 @@ NETDEVICE_SHOW_RW(flags, fmt_hex);
 
 static int change_tx_queue_len(struct net_device *dev, unsigned long new_len)
 {
-	int res, orig_len = dev->tx_queue_len;
+	unsigned int orig_len = dev->tx_queue_len;
+	int res;
+
+	if (new_len != (unsigned int)new_len)
+		return -ERANGE;
 
 	if (new_len != orig_len) {
 		dev->tx_queue_len = new_len;
@@ -349,7 +353,7 @@ static ssize_t tx_queue_len_store(struct device *dev,
 
 	return netdev_store(dev, attr, buf, len, change_tx_queue_len);
 }
-NETDEVICE_SHOW_RW(tx_queue_len, fmt_ulong);
+NETDEVICE_SHOW_RW(tx_queue_len, fmt_dec);
 
 static int change_gro_flush_timeout(struct net_device *dev, unsigned long val)
 {
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 9e2c0a7cb325..7084f1db2446 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -941,6 +941,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev,
 	       + nla_total_size(MAX_PHYS_ITEM_ID_LEN) /* IFLA_PHYS_SWITCH_ID */
 	       + nla_total_size(IFNAMSIZ) /* IFLA_PHYS_PORT_NAME */
 	       + rtnl_xdp_size() /* IFLA_XDP */
+	       + nla_total_size(4)  /* IFLA_EVENT */
 	       + nla_total_size(1); /* IFLA_PROTO_DOWN */
 
 }
@@ -1282,9 +1283,40 @@ err_cancel:
 	return err;
 }
 
+static u32 rtnl_get_event(unsigned long event)
+{
+	u32 rtnl_event_type = IFLA_EVENT_NONE;
+
+	switch (event) {
+	case NETDEV_REBOOT:
+		rtnl_event_type = IFLA_EVENT_REBOOT;
+		break;
+	case NETDEV_FEAT_CHANGE:
+		rtnl_event_type = IFLA_EVENT_FEATURES;
+		break;
+	case NETDEV_BONDING_FAILOVER:
+		rtnl_event_type = IFLA_EVENT_BONDING_FAILOVER;
+		break;
+	case NETDEV_NOTIFY_PEERS:
+		rtnl_event_type = IFLA_EVENT_NOTIFY_PEERS;
+		break;
+	case NETDEV_RESEND_IGMP:
+		rtnl_event_type = IFLA_EVENT_IGMP_RESEND;
+		break;
+	case NETDEV_CHANGEINFODATA:
+		rtnl_event_type = IFLA_EVENT_BONDING_OPTIONS;
+		break;
+	default:
+		break;
+	}
+
+	return rtnl_event_type;
+}
+
 static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 			    int type, u32 pid, u32 seq, u32 change,
-			    unsigned int flags, u32 ext_filter_mask)
+			    unsigned int flags, u32 ext_filter_mask,
+			    u32 event)
 {
 	struct ifinfomsg *ifm;
 	struct nlmsghdr *nlh;
@@ -1333,6 +1365,11 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 	    nla_put_u8(skb, IFLA_PROTO_DOWN, dev->proto_down))
 		goto nla_put_failure;
 
+	if (event != IFLA_EVENT_NONE) {
+		if (nla_put_u32(skb, IFLA_EVENT, event))
+			goto nla_put_failure;
+	}
+
 	if (rtnl_fill_link_ifmap(skb, dev))
 		goto nla_put_failure;
 
@@ -1467,6 +1504,7 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = {
 	[IFLA_LINK_NETNSID]	= { .type = NLA_S32 },
 	[IFLA_PROTO_DOWN]	= { .type = NLA_U8 },
 	[IFLA_XDP]		= { .type = NLA_NESTED },
+	[IFLA_EVENT]		= { .type = NLA_U32 },
 };
 
 static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
@@ -1626,7 +1664,7 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
 					       NETLINK_CB(cb->skb).portid,
 					       cb->nlh->nlmsg_seq, 0,
 					       flags,
-					       ext_filter_mask);
+					       ext_filter_mask, 0);
 
 			if (err < 0) {
 				if (likely(skb->len))
@@ -2048,8 +2086,8 @@ static int do_setlink(const struct sk_buff *skb,
 	}
 
 	if (tb[IFLA_TXQLEN]) {
-		unsigned long value = nla_get_u32(tb[IFLA_TXQLEN]);
-		unsigned long orig_len = dev->tx_queue_len;
+		unsigned int value = nla_get_u32(tb[IFLA_TXQLEN]);
+		unsigned int orig_len = dev->tx_queue_len;
 
 		if (dev->tx_queue_len ^ value) {
 			dev->tx_queue_len = value;
@@ -2736,7 +2774,7 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr *nlh,
 		return -ENOBUFS;
 
 	err = rtnl_fill_ifinfo(nskb, dev, RTM_NEWLINK, NETLINK_CB(skb).portid,
-			       nlh->nlmsg_seq, 0, 0, ext_filter_mask);
+			       nlh->nlmsg_seq, 0, 0, ext_filter_mask, 0);
 	if (err < 0) {
 		/* -EMSGSIZE implies BUG in if_nlmsg_size */
 		WARN_ON(err == -EMSGSIZE);
@@ -2808,7 +2846,8 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
 }
 
 struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev,
-				       unsigned int change, gfp_t flags)
+				       unsigned int change,
+				       u32 event, gfp_t flags)
 {
 	struct net *net = dev_net(dev);
 	struct sk_buff *skb;
@@ -2819,7 +2858,7 @@ struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev,
 	if (skb == NULL)
 		goto errout;
 
-	err = rtnl_fill_ifinfo(skb, dev, type, 0, 0, change, 0, 0);
+	err = rtnl_fill_ifinfo(skb, dev, type, 0, 0, change, 0, 0, event);
 	if (err < 0) {
 		/* -EMSGSIZE implies BUG in if_nlmsg_size() */
 		WARN_ON(err == -EMSGSIZE);
@@ -2840,18 +2879,25 @@ void rtmsg_ifinfo_send(struct sk_buff *skb, struct net_device *dev, gfp_t flags)
 	rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, flags);
 }
 
-void rtmsg_ifinfo(int type, struct net_device *dev, unsigned int change,
-		  gfp_t flags)
+static void rtmsg_ifinfo_event(int type, struct net_device *dev,
+			       unsigned int change, u32 event,
+			       gfp_t flags)
 {
 	struct sk_buff *skb;
 
 	if (dev->reg_state != NETREG_REGISTERED)
 		return;
 
-	skb = rtmsg_ifinfo_build_skb(type, dev, change, flags);
+	skb = rtmsg_ifinfo_build_skb(type, dev, change, event, flags);
 	if (skb)
 		rtmsg_ifinfo_send(skb, dev, flags);
 }
+
+void rtmsg_ifinfo(int type, struct net_device *dev, unsigned int change,
+		  gfp_t flags)
+{
+	rtmsg_ifinfo_event(type, dev, change, rtnl_get_event(0), flags);
+}
 EXPORT_SYMBOL(rtmsg_ifinfo);
 
 static int nlmsg_populate_fdb_fill(struct sk_buff *skb,
@@ -4168,7 +4214,8 @@ static int rtnetlink_event(struct notifier_block *this, unsigned long event, voi
 	case NETDEV_NOTIFY_PEERS:
 	case NETDEV_RESEND_IGMP:
 	case NETDEV_CHANGEINFODATA:
-		rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL);
+		rtmsg_ifinfo_event(RTM_NEWLINK, dev, 0, rtnl_get_event(event),
+				   GFP_KERNEL);
 		break;
 	default:
 		break;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index b1be7c01efe2..82cfc9c7a090 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -2243,6 +2243,32 @@ __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset,
 }
 EXPORT_SYMBOL(skb_copy_and_csum_bits);
 
+static __wsum warn_crc32c_csum_update(const void *buff, int len, __wsum sum)
+{
+	net_warn_ratelimited(
+		"%s: attempt to compute crc32c without libcrc32c.ko\n",
+		__func__);
+	return 0;
+}
+
+static __wsum warn_crc32c_csum_combine(__wsum csum, __wsum csum2,
+				       int offset, int len)
+{
+	net_warn_ratelimited(
+		"%s: attempt to compute crc32c without libcrc32c.ko\n",
+		__func__);
+	return 0;
+}
+
+static const struct skb_checksum_ops default_crc32c_ops = {
+	.update  = warn_crc32c_csum_update,
+	.combine = warn_crc32c_csum_combine,
+};
+
+const struct skb_checksum_ops *crc32c_csum_stub __read_mostly =
+	&default_crc32c_ops;
+EXPORT_SYMBOL(crc32c_csum_stub);
+
  /**
  *	skb_zerocopy_headlen - Calculate headroom needed for skb_zerocopy()
  *	@from: source buffer
@@ -3482,24 +3508,18 @@ void __init skb_init(void)
 						NULL);
 }
 
-/**
- *	skb_to_sgvec - Fill a scatter-gather list from a socket buffer
- *	@skb: Socket buffer containing the buffers to be mapped
- *	@sg: The scatter-gather list to map into
- *	@offset: The offset into the buffer's contents to start mapping
- *	@len: Length of buffer space to be mapped
- *
- *	Fill the specified scatter-gather list with mappings/pointers into a
- *	region of the buffer space attached to a socket buffer.
- */
 static int
-__skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)
+__skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len,
+	       unsigned int recursion_level)
 {
 	int start = skb_headlen(skb);
 	int i, copy = start - offset;
 	struct sk_buff *frag_iter;
 	int elt = 0;
 
+	if (unlikely(recursion_level >= 24))
+		return -EMSGSIZE;
+
 	if (copy > 0) {
 		if (copy > len)
 			copy = len;
@@ -3518,6 +3538,8 @@ __skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)
 		end = start + skb_frag_size(&skb_shinfo(skb)->frags[i]);
 		if ((copy = end - offset) > 0) {
 			skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+			if (unlikely(elt && sg_is_last(&sg[elt - 1])))
+				return -EMSGSIZE;
 
 			if (copy > len)
 				copy = len;
@@ -3532,16 +3554,22 @@ __skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)
 	}
 
 	skb_walk_frags(skb, frag_iter) {
-		int end;
+		int end, ret;
 
 		WARN_ON(start > offset + len);
 
 		end = start + frag_iter->len;
 		if ((copy = end - offset) > 0) {
+			if (unlikely(elt && sg_is_last(&sg[elt - 1])))
+				return -EMSGSIZE;
+
 			if (copy > len)
 				copy = len;
-			elt += __skb_to_sgvec(frag_iter, sg+elt, offset - start,
-					      copy);
+			ret = __skb_to_sgvec(frag_iter, sg+elt, offset - start,
+					      copy, recursion_level + 1);
+			if (unlikely(ret < 0))
+				return ret;
+			elt += ret;
 			if ((len -= copy) == 0)
 				return elt;
 			offset += copy;
@@ -3552,6 +3580,31 @@ __skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)
 	return elt;
 }
 
+/**
+ *	skb_to_sgvec - Fill a scatter-gather list from a socket buffer
+ *	@skb: Socket buffer containing the buffers to be mapped
+ *	@sg: The scatter-gather list to map into
+ *	@offset: The offset into the buffer's contents to start mapping
+ *	@len: Length of buffer space to be mapped
+ *
+ *	Fill the specified scatter-gather list with mappings/pointers into a
+ *	region of the buffer space attached to a socket buffer. Returns either
+ *	the number of scatterlist items used, or -EMSGSIZE if the contents
+ *	could not fit.
+ */
+int skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)
+{
+	int nsg = __skb_to_sgvec(skb, sg, offset, len, 0);
+
+	if (nsg <= 0)
+		return nsg;
+
+	sg_mark_end(&sg[nsg - 1]);
+
+	return nsg;
+}
+EXPORT_SYMBOL_GPL(skb_to_sgvec);
+
 /* As compared with skb_to_sgvec, skb_to_sgvec_nomark only map skb to given
  * sglist without mark the sg which contain last skb data as the end.
  * So the caller can mannipulate sg list as will when padding new data after
@@ -3574,19 +3627,11 @@ __skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)
 int skb_to_sgvec_nomark(struct sk_buff *skb, struct scatterlist *sg,
 			int offset, int len)
 {
-	return __skb_to_sgvec(skb, sg, offset, len);
+	return __skb_to_sgvec(skb, sg, offset, len, 0);
 }
 EXPORT_SYMBOL_GPL(skb_to_sgvec_nomark);
 
-int skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)
-{
-	int nsg = __skb_to_sgvec(skb, sg, offset, len);
 
-	sg_mark_end(&sg[nsg - 1]);
-
-	return nsg;
-}
-EXPORT_SYMBOL_GPL(skb_to_sgvec);
 
 /**
  *	skb_cow_data - Check that a socket buffer's data buffers are writable
@@ -3878,6 +3923,10 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb,
 	if (!sk)
 		return;
 
+	if (!hwtstamps && !(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_TX_SWHW) &&
+	    skb_shinfo(orig_skb)->tx_flags & SKBTX_IN_PROGRESS)
+		return;
+
 	tsonly = sk->sk_tsflags & SOF_TIMESTAMPING_OPT_TSONLY;
 	if (!skb_may_tx_timestamp(sk, tsonly))
 		return;
diff --git a/net/core/sock.c b/net/core/sock.c
index 727f924b7f91..bef844127e01 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1038,6 +1038,10 @@ set_rcvbuf:
 #endif
 
 	case SO_MAX_PACING_RATE:
+		if (val != ~0U)
+			cmpxchg(&sk->sk_pacing_status,
+				SK_PACING_NONE,
+				SK_PACING_NEEDED);
 		sk->sk_max_pacing_rate = val;
 		sk->sk_pacing_rate = min(sk->sk_pacing_rate,
 					 sk->sk_max_pacing_rate);
diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
index 93106120f987..733f523707ac 100644
--- a/net/dcb/dcbnl.c
+++ b/net/dcb/dcbnl.c
@@ -178,10 +178,6 @@ static const struct nla_policy dcbnl_ieee_policy[DCB_ATTR_IEEE_MAX + 1] = {
 	[DCB_ATTR_IEEE_QCN_STATS]   = {.len = sizeof(struct ieee_qcn_stats)},
 };
 
-static const struct nla_policy dcbnl_ieee_app[DCB_ATTR_IEEE_APP_MAX + 1] = {
-	[DCB_ATTR_IEEE_APP]	    = {.len = sizeof(struct dcb_app)},
-};
-
 /* DCB number of traffic classes nested attributes. */
 static const struct nla_policy dcbnl_featcfg_nest[DCB_FEATCFG_ATTR_MAX + 1] = {
 	[DCB_FEATCFG_ATTR_ALL]      = {.type = NLA_FLAG},
@@ -1463,8 +1459,15 @@ static int dcbnl_ieee_set(struct net_device *netdev, struct nlmsghdr *nlh,
 
 		nla_for_each_nested(attr, ieee[DCB_ATTR_IEEE_APP_TABLE], rem) {
 			struct dcb_app *app_data;
+
 			if (nla_type(attr) != DCB_ATTR_IEEE_APP)
 				continue;
+
+			if (nla_len(attr) < sizeof(struct dcb_app)) {
+				err = -ERANGE;
+				goto err;
+			}
+
 			app_data = nla_data(attr);
 			if (ops->ieee_setapp)
 				err = ops->ieee_setapp(netdev, app_data);
diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index 5e3a7302f774..e1295d5f2c56 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -233,7 +233,7 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, unsigned int len)
 {
 	struct dccp_sock *dp = dccp_sk(sk);
 	struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
-	const u32 now = ccid2_time_stamp;
+	const u32 now = ccid2_jiffies32;
 	struct ccid2_seq *next;
 
 	/* slow-start after idle periods (RFC 2581, RFC 2861) */
@@ -466,7 +466,7 @@ static void ccid2_new_ack(struct sock *sk, struct ccid2_seq *seqp,
 	 * The cleanest solution is to not use the ccid2s_sent field at all
 	 * and instead use DCCP timestamps: requires changes in other places.
 	 */
-	ccid2_rtt_estimator(sk, ccid2_time_stamp - seqp->ccid2s_sent);
+	ccid2_rtt_estimator(sk, ccid2_jiffies32 - seqp->ccid2s_sent);
 }
 
 static void ccid2_congestion_event(struct sock *sk, struct ccid2_seq *seqp)
@@ -478,7 +478,7 @@ static void ccid2_congestion_event(struct sock *sk, struct ccid2_seq *seqp)
 		return;
 	}
 
-	hc->tx_last_cong = ccid2_time_stamp;
+	hc->tx_last_cong = ccid2_jiffies32;
 
 	hc->tx_cwnd      = hc->tx_cwnd / 2 ? : 1U;
 	hc->tx_ssthresh  = max(hc->tx_cwnd, 2U);
@@ -731,7 +731,7 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
 
 	hc->tx_rto	 = DCCP_TIMEOUT_INIT;
 	hc->tx_rpdupack  = -1;
-	hc->tx_last_cong = hc->tx_lsndtime = hc->tx_cwnd_stamp = ccid2_time_stamp;
+	hc->tx_last_cong = hc->tx_lsndtime = hc->tx_cwnd_stamp = ccid2_jiffies32;
 	hc->tx_cwnd_used = 0;
 	setup_timer(&hc->tx_rtotimer, ccid2_hc_tx_rto_expire,
 			(unsigned long)sk);
diff --git a/net/dccp/ccids/ccid2.h b/net/dccp/ccids/ccid2.h
index 18c97543e522..6e50ef2898fb 100644
--- a/net/dccp/ccids/ccid2.h
+++ b/net/dccp/ccids/ccid2.h
@@ -27,7 +27,7 @@
  * CCID-2 timestamping faces the same issues as TCP timestamping.
  * Hence we reuse/share as much of the code as possible.
  */
-#define ccid2_time_stamp	tcp_time_stamp
+#define ccid2_jiffies32	((u32)jiffies)
 
 /* NUMDUPACK parameter from RFC 4341, p. 6 */
 #define NUMDUPACK	3
diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig
index 81a0868edb1d..cc5f8f971689 100644
--- a/net/dsa/Kconfig
+++ b/net/dsa/Kconfig
@@ -25,16 +25,19 @@ config NET_DSA_TAG_DSA
 config NET_DSA_TAG_EDSA
 	bool
 
-config NET_DSA_TAG_TRAILER
+config NET_DSA_TAG_KSZ
 	bool
 
-config NET_DSA_TAG_QCA
+config NET_DSA_TAG_LAN9303
 	bool
 
 config NET_DSA_TAG_MTK
 	bool
 
-config NET_DSA_TAG_LAN9303
+config NET_DSA_TAG_TRAILER
+	bool
+
+config NET_DSA_TAG_QCA
 	bool
 
 endif
diff --git a/net/dsa/Makefile b/net/dsa/Makefile
index 0b747d75e65a..fcce25da937c 100644
--- a/net/dsa/Makefile
+++ b/net/dsa/Makefile
@@ -1,12 +1,13 @@
 # the core
 obj-$(CONFIG_NET_DSA) += dsa_core.o
-dsa_core-y += dsa.o slave.o dsa2.o switch.o legacy.o
+dsa_core-y += dsa.o dsa2.o legacy.o port.o slave.o switch.o
 
 # tagging formats
 dsa_core-$(CONFIG_NET_DSA_TAG_BRCM) += tag_brcm.o
 dsa_core-$(CONFIG_NET_DSA_TAG_DSA) += tag_dsa.o
 dsa_core-$(CONFIG_NET_DSA_TAG_EDSA) += tag_edsa.o
-dsa_core-$(CONFIG_NET_DSA_TAG_TRAILER) += tag_trailer.o
-dsa_core-$(CONFIG_NET_DSA_TAG_QCA) += tag_qca.o
-dsa_core-$(CONFIG_NET_DSA_TAG_MTK) += tag_mtk.o
+dsa_core-$(CONFIG_NET_DSA_TAG_KSZ) += tag_ksz.o
 dsa_core-$(CONFIG_NET_DSA_TAG_LAN9303) += tag_lan9303.o
+dsa_core-$(CONFIG_NET_DSA_TAG_MTK) += tag_mtk.o
+dsa_core-$(CONFIG_NET_DSA_TAG_QCA) += tag_qca.o
+dsa_core-$(CONFIG_NET_DSA_TAG_TRAILER) += tag_trailer.o
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index 90038d45a547..517215391514 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -24,7 +24,7 @@
 #include <linux/phy_fixed.h>
 #include <linux/gpio/consumer.h>
 #include <linux/etherdevice.h>
-#include <net/dsa.h>
+
 #include "dsa_priv.h"
 
 static struct sk_buff *dsa_slave_notag_xmit(struct sk_buff *skb,
@@ -40,26 +40,29 @@ static const struct dsa_device_ops none_ops = {
 };
 
 const struct dsa_device_ops *dsa_device_ops[DSA_TAG_LAST] = {
+#ifdef CONFIG_NET_DSA_TAG_BRCM
+	[DSA_TAG_PROTO_BRCM] = &brcm_netdev_ops,
+#endif
 #ifdef CONFIG_NET_DSA_TAG_DSA
 	[DSA_TAG_PROTO_DSA] = &dsa_netdev_ops,
 #endif
 #ifdef CONFIG_NET_DSA_TAG_EDSA
 	[DSA_TAG_PROTO_EDSA] = &edsa_netdev_ops,
 #endif
-#ifdef CONFIG_NET_DSA_TAG_TRAILER
-	[DSA_TAG_PROTO_TRAILER] = &trailer_netdev_ops,
-#endif
-#ifdef CONFIG_NET_DSA_TAG_BRCM
-	[DSA_TAG_PROTO_BRCM] = &brcm_netdev_ops,
+#ifdef CONFIG_NET_DSA_TAG_KSZ
+	[DSA_TAG_PROTO_KSZ] = &ksz_netdev_ops,
 #endif
-#ifdef CONFIG_NET_DSA_TAG_QCA
-	[DSA_TAG_PROTO_QCA] = &qca_netdev_ops,
+#ifdef CONFIG_NET_DSA_TAG_LAN9303
+	[DSA_TAG_PROTO_LAN9303] = &lan9303_netdev_ops,
 #endif
 #ifdef CONFIG_NET_DSA_TAG_MTK
 	[DSA_TAG_PROTO_MTK] = &mtk_netdev_ops,
 #endif
-#ifdef CONFIG_NET_DSA_TAG_LAN9303
-	[DSA_TAG_PROTO_LAN9303] = &lan9303_netdev_ops,
+#ifdef CONFIG_NET_DSA_TAG_QCA
+	[DSA_TAG_PROTO_QCA] = &qca_netdev_ops,
+#endif
+#ifdef CONFIG_NET_DSA_TAG_TRAILER
+	[DSA_TAG_PROTO_TRAILER] = &trailer_netdev_ops,
 #endif
 	[DSA_TAG_PROTO_NONE] = &none_ops,
 };
@@ -109,8 +112,9 @@ const struct dsa_device_ops *dsa_resolve_tag_protocol(int tag_protocol)
 	return ops;
 }
 
-int dsa_cpu_port_ethtool_setup(struct dsa_switch *ds)
+int dsa_cpu_port_ethtool_setup(struct dsa_port *cpu_dp)
 {
+	struct dsa_switch *ds = cpu_dp->ds;
 	struct net_device *master;
 	struct ethtool_ops *cpu_ops;
 
@@ -133,8 +137,9 @@ int dsa_cpu_port_ethtool_setup(struct dsa_switch *ds)
 	return 0;
 }
 
-void dsa_cpu_port_ethtool_restore(struct dsa_switch *ds)
+void dsa_cpu_port_ethtool_restore(struct dsa_port *cpu_dp)
 {
+	struct dsa_switch *ds = cpu_dp->ds;
 	struct net_device *master;
 
 	master = ds->dst->master_netdev;
diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
index 7796580e99ee..f88e1dddb74a 100644
--- a/net/dsa/dsa2.c
+++ b/net/dsa/dsa2.c
@@ -18,7 +18,7 @@
 #include <linux/rtnetlink.h>
 #include <linux/of.h>
 #include <linux/of_net.h>
-#include <net/dsa.h>
+
 #include "dsa_priv.h"
 
 static LIST_HEAD(dsa_switch_trees);
@@ -214,66 +214,59 @@ static int dsa_dst_complete(struct dsa_switch_tree *dst)
 	return 0;
 }
 
-static int dsa_dsa_port_apply(struct dsa_port *port, u32 index,
-			      struct dsa_switch *ds)
+static int dsa_dsa_port_apply(struct dsa_port *port)
 {
+	struct dsa_switch *ds = port->ds;
 	int err;
 
-	err = dsa_cpu_dsa_setup(ds, ds->dev, port, index);
+	err = dsa_cpu_dsa_setup(ds, ds->dev, port, port->index);
 	if (err) {
 		dev_warn(ds->dev, "Failed to setup dsa port %d: %d\n",
-			 index, err);
+			 port->index, err);
 		return err;
 	}
 
-	memset(&ds->ports[index].devlink_port, 0,
-	       sizeof(ds->ports[index].devlink_port));
+	memset(&port->devlink_port, 0, sizeof(port->devlink_port));
 
-	return devlink_port_register(ds->devlink,
-				     &ds->ports[index].devlink_port,
-				     index);
+	return devlink_port_register(ds->devlink, &port->devlink_port,
+				     port->index);
 }
 
-static void dsa_dsa_port_unapply(struct dsa_port *port, u32 index,
-				 struct dsa_switch *ds)
+static void dsa_dsa_port_unapply(struct dsa_port *port)
 {
-	devlink_port_unregister(&ds->ports[index].devlink_port);
+	devlink_port_unregister(&port->devlink_port);
 	dsa_cpu_dsa_destroy(port);
 }
 
-static int dsa_cpu_port_apply(struct dsa_port *port, u32 index,
-			      struct dsa_switch *ds)
+static int dsa_cpu_port_apply(struct dsa_port *port)
 {
+	struct dsa_switch *ds = port->ds;
 	int err;
 
-	err = dsa_cpu_dsa_setup(ds, ds->dev, port, index);
+	err = dsa_cpu_dsa_setup(ds, ds->dev, port, port->index);
 	if (err) {
 		dev_warn(ds->dev, "Failed to setup cpu port %d: %d\n",
-			 index, err);
+			 port->index, err);
 		return err;
 	}
 
-	ds->cpu_port_mask |= BIT(index);
-
-	memset(&ds->ports[index].devlink_port, 0,
-	       sizeof(ds->ports[index].devlink_port));
-	err = devlink_port_register(ds->devlink, &ds->ports[index].devlink_port,
-				    index);
+	memset(&port->devlink_port, 0, sizeof(port->devlink_port));
+	err = devlink_port_register(ds->devlink, &port->devlink_port,
+				    port->index);
 	return err;
 }
 
-static void dsa_cpu_port_unapply(struct dsa_port *port, u32 index,
-				 struct dsa_switch *ds)
+static void dsa_cpu_port_unapply(struct dsa_port *port)
 {
-	devlink_port_unregister(&ds->ports[index].devlink_port);
+	devlink_port_unregister(&port->devlink_port);
 	dsa_cpu_dsa_destroy(port);
-	ds->cpu_port_mask &= ~BIT(index);
+	port->ds->cpu_port_mask &= ~BIT(port->index);
 
 }
 
-static int dsa_user_port_apply(struct dsa_port *port, u32 index,
-			       struct dsa_switch *ds)
+static int dsa_user_port_apply(struct dsa_port *port)
 {
+	struct dsa_switch *ds = port->ds;
 	const char *name = port->name;
 	int err;
 
@@ -282,35 +275,32 @@ static int dsa_user_port_apply(struct dsa_port *port, u32 index,
 	if (!name)
 		name = "eth%d";
 
-	err = dsa_slave_create(ds, ds->dev, index, name);
+	err = dsa_slave_create(ds, ds->dev, port->index, name);
 	if (err) {
 		dev_warn(ds->dev, "Failed to create slave %d: %d\n",
-			 index, err);
-		ds->ports[index].netdev = NULL;
+			 port->index, err);
+		port->netdev = NULL;
 		return err;
 	}
 
-	memset(&ds->ports[index].devlink_port, 0,
-	       sizeof(ds->ports[index].devlink_port));
-	err = devlink_port_register(ds->devlink, &ds->ports[index].devlink_port,
-				    index);
+	memset(&port->devlink_port, 0, sizeof(port->devlink_port));
+	err = devlink_port_register(ds->devlink, &port->devlink_port,
+				    port->index);
 	if (err)
 		return err;
 
-	devlink_port_type_eth_set(&ds->ports[index].devlink_port,
-				  ds->ports[index].netdev);
+	devlink_port_type_eth_set(&port->devlink_port, port->netdev);
 
 	return 0;
 }
 
-static void dsa_user_port_unapply(struct dsa_port *port, u32 index,
-				  struct dsa_switch *ds)
+static void dsa_user_port_unapply(struct dsa_port *port)
 {
-	devlink_port_unregister(&ds->ports[index].devlink_port);
-	if (ds->ports[index].netdev) {
-		dsa_slave_destroy(ds->ports[index].netdev);
-		ds->ports[index].netdev = NULL;
-		ds->enabled_port_mask &= ~(1 << index);
+	devlink_port_unregister(&port->devlink_port);
+	if (port->netdev) {
+		dsa_slave_destroy(port->netdev);
+		port->netdev = NULL;
+		port->ds->enabled_port_mask &= ~(1 << port->index);
 	}
 }
 
@@ -370,20 +360,20 @@ static int dsa_ds_apply(struct dsa_switch_tree *dst, struct dsa_switch *ds)
 			continue;
 
 		if (dsa_port_is_dsa(port)) {
-			err = dsa_dsa_port_apply(port, index, ds);
+			err = dsa_dsa_port_apply(port);
 			if (err)
 				return err;
 			continue;
 		}
 
 		if (dsa_port_is_cpu(port)) {
-			err = dsa_cpu_port_apply(port, index, ds);
+			err = dsa_cpu_port_apply(port);
 			if (err)
 				return err;
 			continue;
 		}
 
-		err = dsa_user_port_apply(port, index, ds);
+		err = dsa_user_port_apply(port);
 		if (err)
 			continue;
 	}
@@ -402,16 +392,16 @@ static void dsa_ds_unapply(struct dsa_switch_tree *dst, struct dsa_switch *ds)
 			continue;
 
 		if (dsa_port_is_dsa(port)) {
-			dsa_dsa_port_unapply(port, index, ds);
+			dsa_dsa_port_unapply(port);
 			continue;
 		}
 
 		if (dsa_port_is_cpu(port)) {
-			dsa_cpu_port_unapply(port, index, ds);
+			dsa_cpu_port_unapply(port);
 			continue;
 		}
 
-		dsa_user_port_unapply(port, index, ds);
+		dsa_user_port_unapply(port);
 	}
 
 	if (ds->slave_mii_bus && ds->ops->phy_read)
@@ -443,8 +433,8 @@ static int dsa_dst_apply(struct dsa_switch_tree *dst)
 			return err;
 	}
 
-	if (dst->cpu_switch) {
-		err = dsa_cpu_port_ethtool_setup(dst->cpu_switch);
+	if (dst->cpu_dp) {
+		err = dsa_cpu_port_ethtool_setup(dst->cpu_dp);
 		if (err)
 			return err;
 	}
@@ -454,7 +444,7 @@ static int dsa_dst_apply(struct dsa_switch_tree *dst)
 	 * sent to the tag format's receive function.
 	 */
 	wmb();
-	dst->master_netdev->dsa_ptr = (void *)dst;
+	dst->master_netdev->dsa_ptr = dst;
 	dst->applied = true;
 
 	return 0;
@@ -484,9 +474,9 @@ static void dsa_dst_unapply(struct dsa_switch_tree *dst)
 		dsa_ds_unapply(dst, ds);
 	}
 
-	if (dst->cpu_switch) {
-		dsa_cpu_port_ethtool_restore(dst->cpu_switch);
-		dst->cpu_switch = NULL;
+	if (dst->cpu_dp) {
+		dsa_cpu_port_ethtool_restore(dst->cpu_dp);
+		dst->cpu_dp = NULL;
 	}
 
 	pr_info("DSA: tree %d unapplied\n", dst->tree);
@@ -520,10 +510,8 @@ static int dsa_cpu_parse(struct dsa_port *port, u32 index,
 	if (!dst->master_netdev)
 		dst->master_netdev = ethernet_dev;
 
-	if (!dst->cpu_switch) {
-		dst->cpu_switch = ds;
-		dst->cpu_port = index;
-	}
+	if (!dst->cpu_dp)
+		dst->cpu_dp = port;
 
 	tag_protocol = ds->ops->get_tag_protocol(ds);
 	dst->tag_ops = dsa_resolve_tag_protocol(tag_protocol);
@@ -534,6 +522,12 @@ static int dsa_cpu_parse(struct dsa_port *port, u32 index,
 
 	dst->rcv = dst->tag_ops->rcv;
 
+	/* Initialize cpu_port_mask now for drv->setup()
+	 * to have access to a correct value, just like what
+	 * net/dsa/dsa.c::dsa_switch_setup_one does.
+	 */
+	ds->cpu_port_mask |= BIT(index);
+
 	return 0;
 }
 
@@ -545,14 +539,22 @@ static int dsa_ds_parse(struct dsa_switch_tree *dst, struct dsa_switch *ds)
 
 	for (index = 0; index < ds->num_ports; index++) {
 		port = &ds->ports[index];
-		if (!dsa_port_is_valid(port))
+		if (!dsa_port_is_valid(port) ||
+		    dsa_port_is_dsa(port))
 			continue;
 
 		if (dsa_port_is_cpu(port)) {
 			err = dsa_cpu_parse(port, index, dst, ds);
 			if (err)
 				return err;
+		} else {
+			/* Initialize enabled_port_mask now for drv->setup()
+			 * to have access to a correct value, just like what
+			 * net/dsa/dsa.c::dsa_switch_setup_one does.
+			 */
+			ds->enabled_port_mask |= BIT(index);
 		}
+
 	}
 
 	pr_info("DSA: switch %d %d parsed\n", dst->tree, ds->index);
@@ -601,13 +603,6 @@ static int dsa_parse_ports_dn(struct device_node *ports, struct dsa_switch *ds)
 			return -EINVAL;
 
 		ds->ports[reg].dn = port;
-
-		/* Initialize enabled_port_mask now for ops->setup()
-		 * to have access to a correct value, just like what
-		 * net/dsa/dsa.c::dsa_switch_setup_one does.
-		 */
-		if (!dsa_port_is_cpu(&ds->ports[reg]))
-			ds->enabled_port_mask |= 1 << reg;
 	}
 
 	return 0;
@@ -623,14 +618,6 @@ static int dsa_parse_ports(struct dsa_chip_data *cd, struct dsa_switch *ds)
 			continue;
 
 		ds->ports[i].name = cd->port_names[i];
-
-		/* Initialize enabled_port_mask now for drv->setup()
-		 * to have access to a correct value, just like what
-		 * net/dsa/dsa.c::dsa_switch_setup_one does.
-		 */
-		if (!dsa_port_is_cpu(&ds->ports[i]))
-			ds->enabled_port_mask |= 1 << i;
-
 		valid_name_found = true;
 	}
 
@@ -690,10 +677,10 @@ static struct device_node *dsa_get_ports(struct dsa_switch *ds,
 	return ports;
 }
 
-static int _dsa_register_switch(struct dsa_switch *ds, struct device *dev)
+static int _dsa_register_switch(struct dsa_switch *ds)
 {
-	struct dsa_chip_data *pdata = dev->platform_data;
-	struct device_node *np = dev->of_node;
+	struct dsa_chip_data *pdata = ds->dev->platform_data;
+	struct device_node *np = ds->dev->of_node;
 	struct dsa_switch_tree *dst;
 	struct device_node *ports;
 	u32 tree, index;
@@ -807,12 +794,12 @@ struct dsa_switch *dsa_switch_alloc(struct device *dev, size_t n)
 }
 EXPORT_SYMBOL_GPL(dsa_switch_alloc);
 
-int dsa_register_switch(struct dsa_switch *ds, struct device *dev)
+int dsa_register_switch(struct dsa_switch *ds)
 {
 	int err;
 
 	mutex_lock(&dsa2_mutex);
-	err = _dsa_register_switch(ds, dev);
+	err = _dsa_register_switch(ds);
 	mutex_unlock(&dsa2_mutex);
 
 	return err;
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index f4a88e485213..66ee248796c8 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -14,6 +14,56 @@
 #include <linux/phy.h>
 #include <linux/netdevice.h>
 #include <linux/netpoll.h>
+#include <net/dsa.h>
+
+enum {
+	DSA_NOTIFIER_AGEING_TIME,
+	DSA_NOTIFIER_BRIDGE_JOIN,
+	DSA_NOTIFIER_BRIDGE_LEAVE,
+	DSA_NOTIFIER_FDB_ADD,
+	DSA_NOTIFIER_FDB_DEL,
+	DSA_NOTIFIER_MDB_ADD,
+	DSA_NOTIFIER_MDB_DEL,
+	DSA_NOTIFIER_VLAN_ADD,
+	DSA_NOTIFIER_VLAN_DEL,
+};
+
+/* DSA_NOTIFIER_AGEING_TIME */
+struct dsa_notifier_ageing_time_info {
+	struct switchdev_trans *trans;
+	unsigned int ageing_time;
+};
+
+/* DSA_NOTIFIER_BRIDGE_* */
+struct dsa_notifier_bridge_info {
+	struct net_device *br;
+	int sw_index;
+	int port;
+};
+
+/* DSA_NOTIFIER_FDB_* */
+struct dsa_notifier_fdb_info {
+	const struct switchdev_obj_port_fdb *fdb;
+	struct switchdev_trans *trans;
+	int sw_index;
+	int port;
+};
+
+/* DSA_NOTIFIER_MDB_* */
+struct dsa_notifier_mdb_info {
+	const struct switchdev_obj_port_mdb *mdb;
+	struct switchdev_trans *trans;
+	int sw_index;
+	int port;
+};
+
+/* DSA_NOTIFIER_VLAN_* */
+struct dsa_notifier_vlan_info {
+	const struct switchdev_obj_port_vlan *vlan;
+	struct switchdev_trans *trans;
+	int sw_index;
+	int port;
+};
 
 struct dsa_device_ops {
 	struct sk_buff *(*xmit)(struct sk_buff *skb, struct net_device *dev);
@@ -23,6 +73,7 @@ struct dsa_device_ops {
 };
 
 struct dsa_slave_priv {
+	/* Copy of dp->ds->dst->tag_ops->xmit for faster access in hot path */
 	struct sk_buff *	(*xmit)(struct sk_buff *skb,
 					struct net_device *dev);
 
@@ -52,13 +103,46 @@ int dsa_cpu_dsa_setup(struct dsa_switch *ds, struct device *dev,
 		      struct dsa_port *dport, int port);
 void dsa_cpu_dsa_destroy(struct dsa_port *dport);
 const struct dsa_device_ops *dsa_resolve_tag_protocol(int tag_protocol);
-int dsa_cpu_port_ethtool_setup(struct dsa_switch *ds);
-void dsa_cpu_port_ethtool_restore(struct dsa_switch *ds);
+int dsa_cpu_port_ethtool_setup(struct dsa_port *cpu_dp);
+void dsa_cpu_port_ethtool_restore(struct dsa_port *cpu_dp);
 
 /* legacy.c */
 int dsa_legacy_register(void);
 void dsa_legacy_unregister(void);
 
+/* port.c */
+int dsa_port_set_state(struct dsa_port *dp, u8 state,
+		       struct switchdev_trans *trans);
+void dsa_port_set_state_now(struct dsa_port *dp, u8 state);
+int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br);
+void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br);
+int dsa_port_vlan_filtering(struct dsa_port *dp, bool vlan_filtering,
+			    struct switchdev_trans *trans);
+int dsa_port_ageing_time(struct dsa_port *dp, clock_t ageing_clock,
+			 struct switchdev_trans *trans);
+int dsa_port_fdb_add(struct dsa_port *dp,
+		     const struct switchdev_obj_port_fdb *fdb,
+		     struct switchdev_trans *trans);
+int dsa_port_fdb_del(struct dsa_port *dp,
+		     const struct switchdev_obj_port_fdb *fdb);
+int dsa_port_fdb_dump(struct dsa_port *dp, struct switchdev_obj_port_fdb *fdb,
+		      switchdev_obj_dump_cb_t *cb);
+int dsa_port_mdb_add(struct dsa_port *dp,
+		     const struct switchdev_obj_port_mdb *mdb,
+		     struct switchdev_trans *trans);
+int dsa_port_mdb_del(struct dsa_port *dp,
+		     const struct switchdev_obj_port_mdb *mdb);
+int dsa_port_mdb_dump(struct dsa_port *dp, struct switchdev_obj_port_mdb *mdb,
+		      switchdev_obj_dump_cb_t *cb);
+int dsa_port_vlan_add(struct dsa_port *dp,
+		      const struct switchdev_obj_port_vlan *vlan,
+		      struct switchdev_trans *trans);
+int dsa_port_vlan_del(struct dsa_port *dp,
+		      const struct switchdev_obj_port_vlan *vlan);
+int dsa_port_vlan_dump(struct dsa_port *dp,
+		       struct switchdev_obj_port_vlan *vlan,
+		       switchdev_obj_dump_cb_t *cb);
+
 /* slave.c */
 extern const struct dsa_device_ops notag_netdev_ops;
 void dsa_slave_mii_bus_init(struct dsa_switch *ds);
@@ -75,25 +159,28 @@ void dsa_slave_unregister_notifier(void);
 int dsa_switch_register_notifier(struct dsa_switch *ds);
 void dsa_switch_unregister_notifier(struct dsa_switch *ds);
 
+/* tag_brcm.c */
+extern const struct dsa_device_ops brcm_netdev_ops;
+
 /* tag_dsa.c */
 extern const struct dsa_device_ops dsa_netdev_ops;
 
 /* tag_edsa.c */
 extern const struct dsa_device_ops edsa_netdev_ops;
 
-/* tag_trailer.c */
-extern const struct dsa_device_ops trailer_netdev_ops;
-
-/* tag_brcm.c */
-extern const struct dsa_device_ops brcm_netdev_ops;
+/* tag_ksz.c */
+extern const struct dsa_device_ops ksz_netdev_ops;
 
-/* tag_qca.c */
-extern const struct dsa_device_ops qca_netdev_ops;
+/* tag_lan9303.c */
+extern const struct dsa_device_ops lan9303_netdev_ops;
 
 /* tag_mtk.c */
 extern const struct dsa_device_ops mtk_netdev_ops;
 
-/* tag_lan9303.c */
-extern const struct dsa_device_ops lan9303_netdev_ops;
+/* tag_qca.c */
+extern const struct dsa_device_ops qca_netdev_ops;
+
+/* tag_trailer.c */
+extern const struct dsa_device_ops trailer_netdev_ops;
 
 #endif
diff --git a/net/dsa/legacy.c b/net/dsa/legacy.c
index 7281098df04e..3a56de8f51a8 100644
--- a/net/dsa/legacy.c
+++ b/net/dsa/legacy.c
@@ -22,7 +22,7 @@
 #include <linux/sysfs.h>
 #include <linux/phy_fixed.h>
 #include <linux/etherdevice.h>
-#include <net/dsa.h>
+
 #include "dsa_priv.h"
 
 /* switch driver registration ***********************************************/
@@ -115,13 +115,12 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent)
 			continue;
 
 		if (!strcmp(name, "cpu")) {
-			if (dst->cpu_switch) {
+			if (dst->cpu_dp) {
 				netdev_err(dst->master_netdev,
 					   "multiple cpu ports?!\n");
 				return -EINVAL;
 			}
-			dst->cpu_switch = ds;
-			dst->cpu_port = i;
+			dst->cpu_dp = &ds->ports[i];
 			ds->cpu_port_mask |= 1 << i;
 		} else if (!strcmp(name, "dsa")) {
 			ds->dsa_port_mask |= 1 << i;
@@ -144,7 +143,7 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent)
 	 * tagging protocol to the preferred tagging format of this
 	 * switch.
 	 */
-	if (dst->cpu_switch == ds) {
+	if (dst->cpu_dp->ds == ds) {
 		enum dsa_tag_protocol tag_protocol;
 
 		tag_protocol = ops->get_tag_protocol(ds);
@@ -206,7 +205,7 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent)
 		netdev_err(dst->master_netdev, "[%d] : can't configure CPU and DSA ports\n",
 			   index);
 
-	ret = dsa_cpu_port_ethtool_setup(ds);
+	ret = dsa_cpu_port_ethtool_setup(ds->dst->cpu_dp);
 	if (ret)
 		return ret;
 
@@ -577,7 +576,6 @@ static int dsa_setup_dst(struct dsa_switch_tree *dst, struct net_device *dev,
 
 	dst->pd = pd;
 	dst->master_netdev = dev;
-	dst->cpu_port = -1;
 
 	for (i = 0; i < pd->nr_chips; i++) {
 		struct dsa_switch *ds;
@@ -606,7 +604,7 @@ static int dsa_setup_dst(struct dsa_switch_tree *dst, struct net_device *dev,
 	 * sent to the tag format's receive function.
 	 */
 	wmb();
-	dev->dsa_ptr = (void *)dst;
+	dev->dsa_ptr = dst;
 
 	return 0;
 }
@@ -688,7 +686,7 @@ static void dsa_remove_dst(struct dsa_switch_tree *dst)
 			dsa_switch_destroy(ds);
 	}
 
-	dsa_cpu_port_ethtool_restore(dst->cpu_switch);
+	dsa_cpu_port_ethtool_restore(dst->cpu_dp);
 
 	dev_put(dst->master_netdev);
 }
diff --git a/net/dsa/port.c b/net/dsa/port.c
new file mode 100644
index 000000000000..efc3bce3a89d
--- /dev/null
+++ b/net/dsa/port.c
@@ -0,0 +1,259 @@
+/*
+ * Handling of a single switch port
+ *
+ * Copyright (c) 2017 Savoir-faire Linux Inc.
+ *	Vivien Didelot <vivien.didelot@savoirfairelinux.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/if_bridge.h>
+#include <linux/notifier.h>
+
+#include "dsa_priv.h"
+
+static int dsa_port_notify(struct dsa_port *dp, unsigned long e, void *v)
+{
+	struct raw_notifier_head *nh = &dp->ds->dst->nh;
+	int err;
+
+	err = raw_notifier_call_chain(nh, e, v);
+
+	return notifier_to_errno(err);
+}
+
+int dsa_port_set_state(struct dsa_port *dp, u8 state,
+		       struct switchdev_trans *trans)
+{
+	struct dsa_switch *ds = dp->ds;
+	int port = dp->index;
+
+	if (switchdev_trans_ph_prepare(trans))
+		return ds->ops->port_stp_state_set ? 0 : -EOPNOTSUPP;
+
+	if (ds->ops->port_stp_state_set)
+		ds->ops->port_stp_state_set(ds, port, state);
+
+	if (ds->ops->port_fast_age) {
+		/* Fast age FDB entries or flush appropriate forwarding database
+		 * for the given port, if we are moving it from Learning or
+		 * Forwarding state, to Disabled or Blocking or Listening state.
+		 */
+
+		if ((dp->stp_state == BR_STATE_LEARNING ||
+		     dp->stp_state == BR_STATE_FORWARDING) &&
+		    (state == BR_STATE_DISABLED ||
+		     state == BR_STATE_BLOCKING ||
+		     state == BR_STATE_LISTENING))
+			ds->ops->port_fast_age(ds, port);
+	}
+
+	dp->stp_state = state;
+
+	return 0;
+}
+
+void dsa_port_set_state_now(struct dsa_port *dp, u8 state)
+{
+	int err;
+
+	err = dsa_port_set_state(dp, state, NULL);
+	if (err)
+		pr_err("DSA: failed to set STP state %u (%d)\n", state, err);
+}
+
+int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br)
+{
+	struct dsa_notifier_bridge_info info = {
+		.sw_index = dp->ds->index,
+		.port = dp->index,
+		.br = br,
+	};
+	int err;
+
+	/* Here the port is already bridged. Reflect the current configuration
+	 * so that drivers can program their chips accordingly.
+	 */
+	dp->bridge_dev = br;
+
+	err = dsa_port_notify(dp, DSA_NOTIFIER_BRIDGE_JOIN, &info);
+
+	/* The bridging is rolled back on error */
+	if (err)
+		dp->bridge_dev = NULL;
+
+	return err;
+}
+
+void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br)
+{
+	struct dsa_notifier_bridge_info info = {
+		.sw_index = dp->ds->index,
+		.port = dp->index,
+		.br = br,
+	};
+	int err;
+
+	/* Here the port is already unbridged. Reflect the current configuration
+	 * so that drivers can program their chips accordingly.
+	 */
+	dp->bridge_dev = NULL;
+
+	err = dsa_port_notify(dp, DSA_NOTIFIER_BRIDGE_LEAVE, &info);
+	if (err)
+		pr_err("DSA: failed to notify DSA_NOTIFIER_BRIDGE_LEAVE\n");
+
+	/* Port left the bridge, put in BR_STATE_DISABLED by the bridge layer,
+	 * so allow it to be in BR_STATE_FORWARDING to be kept functional
+	 */
+	dsa_port_set_state_now(dp, BR_STATE_FORWARDING);
+}
+
+int dsa_port_vlan_filtering(struct dsa_port *dp, bool vlan_filtering,
+			    struct switchdev_trans *trans)
+{
+	struct dsa_switch *ds = dp->ds;
+
+	/* bridge skips -EOPNOTSUPP, so skip the prepare phase */
+	if (switchdev_trans_ph_prepare(trans))
+		return 0;
+
+	if (ds->ops->port_vlan_filtering)
+		return ds->ops->port_vlan_filtering(ds, dp->index,
+						    vlan_filtering);
+
+	return 0;
+}
+
+int dsa_port_ageing_time(struct dsa_port *dp, clock_t ageing_clock,
+			 struct switchdev_trans *trans)
+{
+	unsigned long ageing_jiffies = clock_t_to_jiffies(ageing_clock);
+	unsigned int ageing_time = jiffies_to_msecs(ageing_jiffies);
+	struct dsa_notifier_ageing_time_info info = {
+		.ageing_time = ageing_time,
+		.trans = trans,
+	};
+
+	if (switchdev_trans_ph_prepare(trans))
+		return dsa_port_notify(dp, DSA_NOTIFIER_AGEING_TIME, &info);
+
+	dp->ageing_time = ageing_time;
+
+	return dsa_port_notify(dp, DSA_NOTIFIER_AGEING_TIME, &info);
+}
+
+int dsa_port_fdb_add(struct dsa_port *dp,
+		     const struct switchdev_obj_port_fdb *fdb,
+		     struct switchdev_trans *trans)
+{
+	struct dsa_notifier_fdb_info info = {
+		.sw_index = dp->ds->index,
+		.port = dp->index,
+		.trans = trans,
+		.fdb = fdb,
+	};
+
+	return dsa_port_notify(dp, DSA_NOTIFIER_FDB_ADD, &info);
+}
+
+int dsa_port_fdb_del(struct dsa_port *dp,
+		     const struct switchdev_obj_port_fdb *fdb)
+{
+	struct dsa_notifier_fdb_info info = {
+		.sw_index = dp->ds->index,
+		.port = dp->index,
+		.fdb = fdb,
+	};
+
+	return dsa_port_notify(dp, DSA_NOTIFIER_FDB_DEL, &info);
+}
+
+int dsa_port_fdb_dump(struct dsa_port *dp, struct switchdev_obj_port_fdb *fdb,
+		      switchdev_obj_dump_cb_t *cb)
+{
+	struct dsa_switch *ds = dp->ds;
+
+	if (ds->ops->port_fdb_dump)
+		return ds->ops->port_fdb_dump(ds, dp->index, fdb, cb);
+
+	return -EOPNOTSUPP;
+}
+
+int dsa_port_mdb_add(struct dsa_port *dp,
+		     const struct switchdev_obj_port_mdb *mdb,
+		     struct switchdev_trans *trans)
+{
+	struct dsa_notifier_mdb_info info = {
+		.sw_index = dp->ds->index,
+		.port = dp->index,
+		.trans = trans,
+		.mdb = mdb,
+	};
+
+	return dsa_port_notify(dp, DSA_NOTIFIER_MDB_ADD, &info);
+}
+
+int dsa_port_mdb_del(struct dsa_port *dp,
+		     const struct switchdev_obj_port_mdb *mdb)
+{
+	struct dsa_notifier_mdb_info info = {
+		.sw_index = dp->ds->index,
+		.port = dp->index,
+		.mdb = mdb,
+	};
+
+	return dsa_port_notify(dp, DSA_NOTIFIER_MDB_DEL, &info);
+}
+
+int dsa_port_mdb_dump(struct dsa_port *dp, struct switchdev_obj_port_mdb *mdb,
+		      switchdev_obj_dump_cb_t *cb)
+{
+	struct dsa_switch *ds = dp->ds;
+
+	if (ds->ops->port_mdb_dump)
+		return ds->ops->port_mdb_dump(ds, dp->index, mdb, cb);
+
+	return -EOPNOTSUPP;
+}
+
+int dsa_port_vlan_add(struct dsa_port *dp,
+		      const struct switchdev_obj_port_vlan *vlan,
+		      struct switchdev_trans *trans)
+{
+	struct dsa_notifier_vlan_info info = {
+		.sw_index = dp->ds->index,
+		.port = dp->index,
+		.trans = trans,
+		.vlan = vlan,
+	};
+
+	return dsa_port_notify(dp, DSA_NOTIFIER_VLAN_ADD, &info);
+}
+
+int dsa_port_vlan_del(struct dsa_port *dp,
+		      const struct switchdev_obj_port_vlan *vlan)
+{
+	struct dsa_notifier_vlan_info info = {
+		.sw_index = dp->ds->index,
+		.port = dp->index,
+		.vlan = vlan,
+	};
+
+	return dsa_port_notify(dp, DSA_NOTIFIER_VLAN_DEL, &info);
+}
+
+int dsa_port_vlan_dump(struct dsa_port *dp,
+		       struct switchdev_obj_port_vlan *vlan,
+		       switchdev_obj_dump_cb_t *cb)
+{
+	struct dsa_switch *ds = dp->ds;
+
+	if (ds->ops->port_vlan_dump)
+		return ds->ops->port_vlan_dump(ds, dp->index, vlan, cb);
+
+	return -EOPNOTSUPP;
+}
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 7693182df81e..1cfdb31a2f44 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -17,28 +17,16 @@
 #include <linux/of_mdio.h>
 #include <linux/mdio.h>
 #include <linux/list.h>
-#include <net/dsa.h>
 #include <net/rtnetlink.h>
-#include <net/switchdev.h>
 #include <net/pkt_cls.h>
 #include <net/tc_act/tc_mirred.h>
 #include <linux/if_bridge.h>
 #include <linux/netpoll.h>
+
 #include "dsa_priv.h"
 
 static bool dsa_slave_dev_check(struct net_device *dev);
 
-static int dsa_slave_notify(struct net_device *dev, unsigned long e, void *v)
-{
-	struct dsa_slave_priv *p = netdev_priv(dev);
-	struct raw_notifier_head *nh = &p->dp->ds->dst->nh;
-	int err;
-
-	err = raw_notifier_call_chain(nh, e, v);
-
-	return notifier_to_errno(err);
-}
-
 /* slave mii_bus handling ***************************************************/
 static int dsa_slave_phy_read(struct mii_bus *bus, int addr, int reg)
 {
@@ -81,45 +69,13 @@ static int dsa_slave_get_iflink(const struct net_device *dev)
 	return p->dp->ds->dst->master_netdev->ifindex;
 }
 
-static inline bool dsa_port_is_bridged(struct dsa_port *dp)
-{
-	return !!dp->bridge_dev;
-}
-
-static void dsa_slave_set_state(struct net_device *dev, u8 state)
+static int dsa_slave_open(struct net_device *dev)
 {
 	struct dsa_slave_priv *p = netdev_priv(dev);
 	struct dsa_port *dp = p->dp;
 	struct dsa_switch *ds = dp->ds;
-	int port = dp->index;
-
-	if (ds->ops->port_stp_state_set)
-		ds->ops->port_stp_state_set(ds, port, state);
-
-	if (ds->ops->port_fast_age) {
-		/* Fast age FDB entries or flush appropriate forwarding database
-		 * for the given port, if we are moving it from Learning or
-		 * Forwarding state, to Disabled or Blocking or Listening state.
-		 */
-
-		if ((dp->stp_state == BR_STATE_LEARNING ||
-		     dp->stp_state == BR_STATE_FORWARDING) &&
-		    (state == BR_STATE_DISABLED ||
-		     state == BR_STATE_BLOCKING ||
-		     state == BR_STATE_LISTENING))
-			ds->ops->port_fast_age(ds, port);
-	}
-
-	dp->stp_state = state;
-}
-
-static int dsa_slave_open(struct net_device *dev)
-{
-	struct dsa_slave_priv *p = netdev_priv(dev);
-	struct net_device *master = p->dp->ds->dst->master_netdev;
-	struct dsa_switch *ds = p->dp->ds;
-	u8 stp_state = dsa_port_is_bridged(p->dp) ?
-			BR_STATE_BLOCKING : BR_STATE_FORWARDING;
+	struct net_device *master = ds->dst->master_netdev;
+	u8 stp_state = dp->bridge_dev ? BR_STATE_BLOCKING : BR_STATE_FORWARDING;
 	int err;
 
 	if (!(master->flags & IFF_UP))
@@ -148,7 +104,7 @@ static int dsa_slave_open(struct net_device *dev)
 			goto clear_promisc;
 	}
 
-	dsa_slave_set_state(dev, stp_state);
+	dsa_port_set_state_now(p->dp, stp_state);
 
 	if (p->phy)
 		phy_start(p->phy);
@@ -190,7 +146,7 @@ static int dsa_slave_close(struct net_device *dev)
 	if (ds->ops->port_disable)
 		ds->ops->port_disable(ds, p->dp->index, p->phy);
 
-	dsa_slave_set_state(dev, BR_STATE_DISABLED);
+	dsa_port_set_state_now(p->dp, BR_STATE_DISABLED);
 
 	return 0;
 }
@@ -243,140 +199,6 @@ out:
 	return 0;
 }
 
-static int dsa_slave_port_vlan_add(struct net_device *dev,
-				   const struct switchdev_obj_port_vlan *vlan,
-				   struct switchdev_trans *trans)
-{
-	struct dsa_slave_priv *p = netdev_priv(dev);
-	struct dsa_port *dp = p->dp;
-	struct dsa_switch *ds = dp->ds;
-
-	if (switchdev_trans_ph_prepare(trans)) {
-		if (!ds->ops->port_vlan_prepare || !ds->ops->port_vlan_add)
-			return -EOPNOTSUPP;
-
-		return ds->ops->port_vlan_prepare(ds, dp->index, vlan, trans);
-	}
-
-	ds->ops->port_vlan_add(ds, dp->index, vlan, trans);
-
-	return 0;
-}
-
-static int dsa_slave_port_vlan_del(struct net_device *dev,
-				   const struct switchdev_obj_port_vlan *vlan)
-{
-	struct dsa_slave_priv *p = netdev_priv(dev);
-	struct dsa_switch *ds = p->dp->ds;
-
-	if (!ds->ops->port_vlan_del)
-		return -EOPNOTSUPP;
-
-	return ds->ops->port_vlan_del(ds, p->dp->index, vlan);
-}
-
-static int dsa_slave_port_vlan_dump(struct net_device *dev,
-				    struct switchdev_obj_port_vlan *vlan,
-				    switchdev_obj_dump_cb_t *cb)
-{
-	struct dsa_slave_priv *p = netdev_priv(dev);
-	struct dsa_switch *ds = p->dp->ds;
-
-	if (ds->ops->port_vlan_dump)
-		return ds->ops->port_vlan_dump(ds, p->dp->index, vlan, cb);
-
-	return -EOPNOTSUPP;
-}
-
-static int dsa_slave_port_fdb_add(struct net_device *dev,
-				  const struct switchdev_obj_port_fdb *fdb,
-				  struct switchdev_trans *trans)
-{
-	struct dsa_slave_priv *p = netdev_priv(dev);
-	struct dsa_switch *ds = p->dp->ds;
-
-	if (switchdev_trans_ph_prepare(trans)) {
-		if (!ds->ops->port_fdb_prepare || !ds->ops->port_fdb_add)
-			return -EOPNOTSUPP;
-
-		return ds->ops->port_fdb_prepare(ds, p->dp->index, fdb, trans);
-	}
-
-	ds->ops->port_fdb_add(ds, p->dp->index, fdb, trans);
-
-	return 0;
-}
-
-static int dsa_slave_port_fdb_del(struct net_device *dev,
-				  const struct switchdev_obj_port_fdb *fdb)
-{
-	struct dsa_slave_priv *p = netdev_priv(dev);
-	struct dsa_switch *ds = p->dp->ds;
-	int ret = -EOPNOTSUPP;
-
-	if (ds->ops->port_fdb_del)
-		ret = ds->ops->port_fdb_del(ds, p->dp->index, fdb);
-
-	return ret;
-}
-
-static int dsa_slave_port_fdb_dump(struct net_device *dev,
-				   struct switchdev_obj_port_fdb *fdb,
-				   switchdev_obj_dump_cb_t *cb)
-{
-	struct dsa_slave_priv *p = netdev_priv(dev);
-	struct dsa_switch *ds = p->dp->ds;
-
-	if (ds->ops->port_fdb_dump)
-		return ds->ops->port_fdb_dump(ds, p->dp->index, fdb, cb);
-
-	return -EOPNOTSUPP;
-}
-
-static int dsa_slave_port_mdb_add(struct net_device *dev,
-				  const struct switchdev_obj_port_mdb *mdb,
-				  struct switchdev_trans *trans)
-{
-	struct dsa_slave_priv *p = netdev_priv(dev);
-	struct dsa_switch *ds = p->dp->ds;
-
-	if (switchdev_trans_ph_prepare(trans)) {
-		if (!ds->ops->port_mdb_prepare || !ds->ops->port_mdb_add)
-			return -EOPNOTSUPP;
-
-		return ds->ops->port_mdb_prepare(ds, p->dp->index, mdb, trans);
-	}
-
-	ds->ops->port_mdb_add(ds, p->dp->index, mdb, trans);
-
-	return 0;
-}
-
-static int dsa_slave_port_mdb_del(struct net_device *dev,
-				  const struct switchdev_obj_port_mdb *mdb)
-{
-	struct dsa_slave_priv *p = netdev_priv(dev);
-	struct dsa_switch *ds = p->dp->ds;
-
-	if (ds->ops->port_mdb_del)
-		return ds->ops->port_mdb_del(ds, p->dp->index, mdb);
-
-	return -EOPNOTSUPP;
-}
-
-static int dsa_slave_port_mdb_dump(struct net_device *dev,
-				   struct switchdev_obj_port_mdb *mdb,
-				   switchdev_obj_dump_cb_t *cb)
-{
-	struct dsa_slave_priv *p = netdev_priv(dev);
-	struct dsa_switch *ds = p->dp->ds;
-
-	if (ds->ops->port_mdb_dump)
-		return ds->ops->port_mdb_dump(ds, p->dp->index, mdb, cb);
-
-	return -EOPNOTSUPP;
-}
-
 static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 {
 	struct dsa_slave_priv *p = netdev_priv(dev);
@@ -387,96 +209,24 @@ static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 	return -EOPNOTSUPP;
 }
 
-static int dsa_slave_stp_state_set(struct net_device *dev,
-				   const struct switchdev_attr *attr,
-				   struct switchdev_trans *trans)
-{
-	struct dsa_slave_priv *p = netdev_priv(dev);
-	struct dsa_switch *ds = p->dp->ds;
-
-	if (switchdev_trans_ph_prepare(trans))
-		return ds->ops->port_stp_state_set ? 0 : -EOPNOTSUPP;
-
-	dsa_slave_set_state(dev, attr->u.stp_state);
-
-	return 0;
-}
-
-static int dsa_slave_vlan_filtering(struct net_device *dev,
-				    const struct switchdev_attr *attr,
-				    struct switchdev_trans *trans)
-{
-	struct dsa_slave_priv *p = netdev_priv(dev);
-	struct dsa_switch *ds = p->dp->ds;
-
-	/* bridge skips -EOPNOTSUPP, so skip the prepare phase */
-	if (switchdev_trans_ph_prepare(trans))
-		return 0;
-
-	if (ds->ops->port_vlan_filtering)
-		return ds->ops->port_vlan_filtering(ds, p->dp->index,
-						    attr->u.vlan_filtering);
-
-	return 0;
-}
-
-static unsigned int dsa_fastest_ageing_time(struct dsa_switch *ds,
-					    unsigned int ageing_time)
-{
-	int i;
-
-	for (i = 0; i < ds->num_ports; ++i) {
-		struct dsa_port *dp = &ds->ports[i];
-
-		if (dp && dp->ageing_time && dp->ageing_time < ageing_time)
-			ageing_time = dp->ageing_time;
-	}
-
-	return ageing_time;
-}
-
-static int dsa_slave_ageing_time(struct net_device *dev,
-				 const struct switchdev_attr *attr,
-				 struct switchdev_trans *trans)
-{
-	struct dsa_slave_priv *p = netdev_priv(dev);
-	struct dsa_switch *ds = p->dp->ds;
-	unsigned long ageing_jiffies = clock_t_to_jiffies(attr->u.ageing_time);
-	unsigned int ageing_time = jiffies_to_msecs(ageing_jiffies);
-
-	if (switchdev_trans_ph_prepare(trans)) {
-		if (ds->ageing_time_min && ageing_time < ds->ageing_time_min)
-			return -ERANGE;
-		if (ds->ageing_time_max && ageing_time > ds->ageing_time_max)
-			return -ERANGE;
-		return 0;
-	}
-
-	/* Keep the fastest ageing time in case of multiple bridges */
-	p->dp->ageing_time = ageing_time;
-	ageing_time = dsa_fastest_ageing_time(ds, ageing_time);
-
-	if (ds->ops->set_ageing_time)
-		return ds->ops->set_ageing_time(ds, ageing_time);
-
-	return 0;
-}
-
 static int dsa_slave_port_attr_set(struct net_device *dev,
 				   const struct switchdev_attr *attr,
 				   struct switchdev_trans *trans)
 {
+	struct dsa_slave_priv *p = netdev_priv(dev);
+	struct dsa_port *dp = p->dp;
 	int ret;
 
 	switch (attr->id) {
 	case SWITCHDEV_ATTR_ID_PORT_STP_STATE:
-		ret = dsa_slave_stp_state_set(dev, attr, trans);
+		ret = dsa_port_set_state(dp, attr->u.stp_state, trans);
 		break;
 	case SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING:
-		ret = dsa_slave_vlan_filtering(dev, attr, trans);
+		ret = dsa_port_vlan_filtering(dp, attr->u.vlan_filtering,
+					      trans);
 		break;
 	case SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME:
-		ret = dsa_slave_ageing_time(dev, attr, trans);
+		ret = dsa_port_ageing_time(dp, attr->u.ageing_time, trans);
 		break;
 	default:
 		ret = -EOPNOTSUPP;
@@ -490,6 +240,8 @@ static int dsa_slave_port_obj_add(struct net_device *dev,
 				  const struct switchdev_obj *obj,
 				  struct switchdev_trans *trans)
 {
+	struct dsa_slave_priv *p = netdev_priv(dev);
+	struct dsa_port *dp = p->dp;
 	int err;
 
 	/* For the prepare phase, ensure the full set of changes is feasable in
@@ -499,18 +251,14 @@ static int dsa_slave_port_obj_add(struct net_device *dev,
 
 	switch (obj->id) {
 	case SWITCHDEV_OBJ_ID_PORT_FDB:
-		err = dsa_slave_port_fdb_add(dev,
-					     SWITCHDEV_OBJ_PORT_FDB(obj),
-					     trans);
+		err = dsa_port_fdb_add(dp, SWITCHDEV_OBJ_PORT_FDB(obj), trans);
 		break;
 	case SWITCHDEV_OBJ_ID_PORT_MDB:
-		err = dsa_slave_port_mdb_add(dev, SWITCHDEV_OBJ_PORT_MDB(obj),
-					     trans);
+		err = dsa_port_mdb_add(dp, SWITCHDEV_OBJ_PORT_MDB(obj), trans);
 		break;
 	case SWITCHDEV_OBJ_ID_PORT_VLAN:
-		err = dsa_slave_port_vlan_add(dev,
-					      SWITCHDEV_OBJ_PORT_VLAN(obj),
-					      trans);
+		err = dsa_port_vlan_add(dp, SWITCHDEV_OBJ_PORT_VLAN(obj),
+					trans);
 		break;
 	default:
 		err = -EOPNOTSUPP;
@@ -523,19 +271,19 @@ static int dsa_slave_port_obj_add(struct net_device *dev,
 static int dsa_slave_port_obj_del(struct net_device *dev,
 				  const struct switchdev_obj *obj)
 {
+	struct dsa_slave_priv *p = netdev_priv(dev);
+	struct dsa_port *dp = p->dp;
 	int err;
 
 	switch (obj->id) {
 	case SWITCHDEV_OBJ_ID_PORT_FDB:
-		err = dsa_slave_port_fdb_del(dev,
-					     SWITCHDEV_OBJ_PORT_FDB(obj));
+		err = dsa_port_fdb_del(dp, SWITCHDEV_OBJ_PORT_FDB(obj));
 		break;
 	case SWITCHDEV_OBJ_ID_PORT_MDB:
-		err = dsa_slave_port_mdb_del(dev, SWITCHDEV_OBJ_PORT_MDB(obj));
+		err = dsa_port_mdb_del(dp, SWITCHDEV_OBJ_PORT_MDB(obj));
 		break;
 	case SWITCHDEV_OBJ_ID_PORT_VLAN:
-		err = dsa_slave_port_vlan_del(dev,
-					      SWITCHDEV_OBJ_PORT_VLAN(obj));
+		err = dsa_port_vlan_del(dp, SWITCHDEV_OBJ_PORT_VLAN(obj));
 		break;
 	default:
 		err = -EOPNOTSUPP;
@@ -549,22 +297,19 @@ static int dsa_slave_port_obj_dump(struct net_device *dev,
 				   struct switchdev_obj *obj,
 				   switchdev_obj_dump_cb_t *cb)
 {
+	struct dsa_slave_priv *p = netdev_priv(dev);
+	struct dsa_port *dp = p->dp;
 	int err;
 
 	switch (obj->id) {
 	case SWITCHDEV_OBJ_ID_PORT_FDB:
-		err = dsa_slave_port_fdb_dump(dev,
-					      SWITCHDEV_OBJ_PORT_FDB(obj),
-					      cb);
+		err = dsa_port_fdb_dump(dp, SWITCHDEV_OBJ_PORT_FDB(obj), cb);
 		break;
 	case SWITCHDEV_OBJ_ID_PORT_MDB:
-		err = dsa_slave_port_mdb_dump(dev, SWITCHDEV_OBJ_PORT_MDB(obj),
-					      cb);
+		err = dsa_port_mdb_dump(dp, SWITCHDEV_OBJ_PORT_MDB(obj), cb);
 		break;
 	case SWITCHDEV_OBJ_ID_PORT_VLAN:
-		err = dsa_slave_port_vlan_dump(dev,
-					       SWITCHDEV_OBJ_PORT_VLAN(obj),
-					       cb);
+		err = dsa_port_vlan_dump(dp, SWITCHDEV_OBJ_PORT_VLAN(obj), cb);
 		break;
 	default:
 		err = -EOPNOTSUPP;
@@ -574,57 +319,6 @@ static int dsa_slave_port_obj_dump(struct net_device *dev,
 	return err;
 }
 
-static int dsa_slave_bridge_port_join(struct net_device *dev,
-				      struct net_device *br)
-{
-	struct dsa_slave_priv *p = netdev_priv(dev);
-	struct dsa_notifier_bridge_info info = {
-		.sw_index = p->dp->ds->index,
-		.port = p->dp->index,
-		.br = br,
-	};
-	int err;
-
-	/* Here the port is already bridged. Reflect the current configuration
-	 * so that drivers can program their chips accordingly.
-	 */
-	p->dp->bridge_dev = br;
-
-	err = dsa_slave_notify(dev, DSA_NOTIFIER_BRIDGE_JOIN, &info);
-
-	/* The bridging is rolled back on error */
-	if (err)
-		p->dp->bridge_dev = NULL;
-
-	return err;
-}
-
-static void dsa_slave_bridge_port_leave(struct net_device *dev,
-					struct net_device *br)
-{
-	struct dsa_slave_priv *p = netdev_priv(dev);
-	struct dsa_notifier_bridge_info info = {
-		.sw_index = p->dp->ds->index,
-		.port = p->dp->index,
-		.br = br,
-	};
-	int err;
-
-	/* Here the port is already unbridged. Reflect the current configuration
-	 * so that drivers can program their chips accordingly.
-	 */
-	p->dp->bridge_dev = NULL;
-
-	err = dsa_slave_notify(dev, DSA_NOTIFIER_BRIDGE_LEAVE, &info);
-	if (err)
-		netdev_err(dev, "failed to notify DSA_NOTIFIER_BRIDGE_LEAVE\n");
-
-	/* Port left the bridge, put in BR_STATE_DISABLED by the bridge layer,
-	 * so allow it to be in BR_STATE_FORWARDING to be kept functional
-	 */
-	dsa_slave_set_state(dev, BR_STATE_FORWARDING);
-}
-
 static int dsa_slave_port_attr_get(struct net_device *dev,
 				   struct switchdev_attr *attr)
 {
@@ -663,10 +357,14 @@ static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev)
 	dev->stats.tx_packets++;
 	dev->stats.tx_bytes += skb->len;
 
-	/* Transmit function may have to reallocate the original SKB */
+	/* Transmit function may have to reallocate the original SKB,
+	 * in which case it must have freed it. Only free it here on error.
+	 */
 	nskb = p->xmit(skb, dev);
-	if (!nskb)
+	if (!nskb) {
+		kfree_skb(skb);
 		return NETDEV_TX_OK;
+	}
 
 	/* SKB for netpoll still need to be mangled with the protocol-specific
 	 * tag to be successfully transmitted
@@ -821,8 +519,8 @@ static void dsa_cpu_port_get_ethtool_stats(struct net_device *dev,
 					   uint64_t *data)
 {
 	struct dsa_switch_tree *dst = dev->dsa_ptr;
-	struct dsa_switch *ds = dst->cpu_switch;
-	s8 cpu_port = dst->cpu_port;
+	struct dsa_switch *ds = dst->cpu_dp->ds;
+	s8 cpu_port = dst->cpu_dp->index;
 	int count = 0;
 
 	if (dst->master_ethtool_ops.get_sset_count) {
@@ -838,7 +536,7 @@ static void dsa_cpu_port_get_ethtool_stats(struct net_device *dev,
 static int dsa_cpu_port_get_sset_count(struct net_device *dev, int sset)
 {
 	struct dsa_switch_tree *dst = dev->dsa_ptr;
-	struct dsa_switch *ds = dst->cpu_switch;
+	struct dsa_switch *ds = dst->cpu_dp->ds;
 	int count = 0;
 
 	if (dst->master_ethtool_ops.get_sset_count)
@@ -854,8 +552,8 @@ static void dsa_cpu_port_get_strings(struct net_device *dev,
 				     uint32_t stringset, uint8_t *data)
 {
 	struct dsa_switch_tree *dst = dev->dsa_ptr;
-	struct dsa_switch *ds = dst->cpu_switch;
-	s8 cpu_port = dst->cpu_port;
+	struct dsa_switch *ds = dst->cpu_dp->ds;
+	s8 cpu_port = dst->cpu_dp->index;
 	int len = ETH_GSTRING_LEN;
 	int mcount = 0, count;
 	unsigned int i;
@@ -1528,14 +1226,16 @@ static bool dsa_slave_dev_check(struct net_device *dev)
 static int dsa_slave_changeupper(struct net_device *dev,
 				 struct netdev_notifier_changeupper_info *info)
 {
+	struct dsa_slave_priv *p = netdev_priv(dev);
+	struct dsa_port *dp = p->dp;
 	int err = NOTIFY_DONE;
 
 	if (netif_is_bridge_master(info->upper_dev)) {
 		if (info->linking) {
-			err = dsa_slave_bridge_port_join(dev, info->upper_dev);
+			err = dsa_port_bridge_join(dp, info->upper_dev);
 			err = notifier_from_errno(err);
 		} else {
-			dsa_slave_bridge_port_leave(dev, info->upper_dev);
+			dsa_port_bridge_leave(dp, info->upper_dev);
 			err = NOTIFY_OK;
 		}
 	}
diff --git a/net/dsa/switch.c b/net/dsa/switch.c
index ca6e26e514f0..d8e5c311ee7c 100644
--- a/net/dsa/switch.c
+++ b/net/dsa/switch.c
@@ -12,7 +12,47 @@
 
 #include <linux/netdevice.h>
 #include <linux/notifier.h>
-#include <net/dsa.h>
+#include <net/switchdev.h>
+
+#include "dsa_priv.h"
+
+static unsigned int dsa_switch_fastest_ageing_time(struct dsa_switch *ds,
+						   unsigned int ageing_time)
+{
+	int i;
+
+	for (i = 0; i < ds->num_ports; ++i) {
+		struct dsa_port *dp = &ds->ports[i];
+
+		if (dp->ageing_time && dp->ageing_time < ageing_time)
+			ageing_time = dp->ageing_time;
+	}
+
+	return ageing_time;
+}
+
+static int dsa_switch_ageing_time(struct dsa_switch *ds,
+				  struct dsa_notifier_ageing_time_info *info)
+{
+	unsigned int ageing_time = info->ageing_time;
+	struct switchdev_trans *trans = info->trans;
+
+	if (switchdev_trans_ph_prepare(trans)) {
+		if (ds->ageing_time_min && ageing_time < ds->ageing_time_min)
+			return -ERANGE;
+		if (ds->ageing_time_max && ageing_time > ds->ageing_time_max)
+			return -ERANGE;
+		return 0;
+	}
+
+	/* Program the fastest ageing time in case of multiple bridges */
+	ageing_time = dsa_switch_fastest_ageing_time(ds, ageing_time);
+
+	if (ds->ops->set_ageing_time)
+		return ds->ops->set_ageing_time(ds, ageing_time);
+
+	return 0;
+}
 
 static int dsa_switch_bridge_join(struct dsa_switch *ds,
 				  struct dsa_notifier_bridge_info *info)
@@ -40,6 +80,117 @@ static int dsa_switch_bridge_leave(struct dsa_switch *ds,
 	return 0;
 }
 
+static int dsa_switch_fdb_add(struct dsa_switch *ds,
+			      struct dsa_notifier_fdb_info *info)
+{
+	const struct switchdev_obj_port_fdb *fdb = info->fdb;
+	struct switchdev_trans *trans = info->trans;
+
+	/* Do not care yet about other switch chips of the fabric */
+	if (ds->index != info->sw_index)
+		return 0;
+
+	if (switchdev_trans_ph_prepare(trans)) {
+		if (!ds->ops->port_fdb_prepare || !ds->ops->port_fdb_add)
+			return -EOPNOTSUPP;
+
+		return ds->ops->port_fdb_prepare(ds, info->port, fdb, trans);
+	}
+
+	ds->ops->port_fdb_add(ds, info->port, fdb, trans);
+
+	return 0;
+}
+
+static int dsa_switch_fdb_del(struct dsa_switch *ds,
+			      struct dsa_notifier_fdb_info *info)
+{
+	const struct switchdev_obj_port_fdb *fdb = info->fdb;
+
+	/* Do not care yet about other switch chips of the fabric */
+	if (ds->index != info->sw_index)
+		return 0;
+
+	if (!ds->ops->port_fdb_del)
+		return -EOPNOTSUPP;
+
+	return ds->ops->port_fdb_del(ds, info->port, fdb);
+}
+
+static int dsa_switch_mdb_add(struct dsa_switch *ds,
+			      struct dsa_notifier_mdb_info *info)
+{
+	const struct switchdev_obj_port_mdb *mdb = info->mdb;
+	struct switchdev_trans *trans = info->trans;
+
+	/* Do not care yet about other switch chips of the fabric */
+	if (ds->index != info->sw_index)
+		return 0;
+
+	if (switchdev_trans_ph_prepare(trans)) {
+		if (!ds->ops->port_mdb_prepare || !ds->ops->port_mdb_add)
+			return -EOPNOTSUPP;
+
+		return ds->ops->port_mdb_prepare(ds, info->port, mdb, trans);
+	}
+
+	ds->ops->port_mdb_add(ds, info->port, mdb, trans);
+
+	return 0;
+}
+
+static int dsa_switch_mdb_del(struct dsa_switch *ds,
+			      struct dsa_notifier_mdb_info *info)
+{
+	const struct switchdev_obj_port_mdb *mdb = info->mdb;
+
+	/* Do not care yet about other switch chips of the fabric */
+	if (ds->index != info->sw_index)
+		return 0;
+
+	if (!ds->ops->port_mdb_del)
+		return -EOPNOTSUPP;
+
+	return ds->ops->port_mdb_del(ds, info->port, mdb);
+}
+
+static int dsa_switch_vlan_add(struct dsa_switch *ds,
+			       struct dsa_notifier_vlan_info *info)
+{
+	const struct switchdev_obj_port_vlan *vlan = info->vlan;
+	struct switchdev_trans *trans = info->trans;
+
+	/* Do not care yet about other switch chips of the fabric */
+	if (ds->index != info->sw_index)
+		return 0;
+
+	if (switchdev_trans_ph_prepare(trans)) {
+		if (!ds->ops->port_vlan_prepare || !ds->ops->port_vlan_add)
+			return -EOPNOTSUPP;
+
+		return ds->ops->port_vlan_prepare(ds, info->port, vlan, trans);
+	}
+
+	ds->ops->port_vlan_add(ds, info->port, vlan, trans);
+
+	return 0;
+}
+
+static int dsa_switch_vlan_del(struct dsa_switch *ds,
+			       struct dsa_notifier_vlan_info *info)
+{
+	const struct switchdev_obj_port_vlan *vlan = info->vlan;
+
+	/* Do not care yet about other switch chips of the fabric */
+	if (ds->index != info->sw_index)
+		return 0;
+
+	if (!ds->ops->port_vlan_del)
+		return -EOPNOTSUPP;
+
+	return ds->ops->port_vlan_del(ds, info->port, vlan);
+}
+
 static int dsa_switch_event(struct notifier_block *nb,
 			    unsigned long event, void *info)
 {
@@ -47,12 +198,33 @@ static int dsa_switch_event(struct notifier_block *nb,
 	int err;
 
 	switch (event) {
+	case DSA_NOTIFIER_AGEING_TIME:
+		err = dsa_switch_ageing_time(ds, info);
+		break;
 	case DSA_NOTIFIER_BRIDGE_JOIN:
 		err = dsa_switch_bridge_join(ds, info);
 		break;
 	case DSA_NOTIFIER_BRIDGE_LEAVE:
 		err = dsa_switch_bridge_leave(ds, info);
 		break;
+	case DSA_NOTIFIER_FDB_ADD:
+		err = dsa_switch_fdb_add(ds, info);
+		break;
+	case DSA_NOTIFIER_FDB_DEL:
+		err = dsa_switch_fdb_del(ds, info);
+		break;
+	case DSA_NOTIFIER_MDB_ADD:
+		err = dsa_switch_mdb_add(ds, info);
+		break;
+	case DSA_NOTIFIER_MDB_DEL:
+		err = dsa_switch_mdb_del(ds, info);
+		break;
+	case DSA_NOTIFIER_VLAN_ADD:
+		err = dsa_switch_vlan_add(ds, info);
+		break;
+	case DSA_NOTIFIER_VLAN_DEL:
+		err = dsa_switch_vlan_del(ds, info);
+		break;
 	default:
 		err = -EOPNOTSUPP;
 		break;
diff --git a/net/dsa/tag_brcm.c b/net/dsa/tag_brcm.c
index 2a9b52c5af86..c03860907f28 100644
--- a/net/dsa/tag_brcm.c
+++ b/net/dsa/tag_brcm.c
@@ -12,7 +12,7 @@
 #include <linux/etherdevice.h>
 #include <linux/list.h>
 #include <linux/slab.h>
-#include <net/dsa.h>
+
 #include "dsa_priv.h"
 
 /* This tag length is 4 bytes, older ones were 6 bytes, we do not
@@ -65,7 +65,7 @@ static struct sk_buff *brcm_tag_xmit(struct sk_buff *skb, struct net_device *dev
 	u8 *brcm_tag;
 
 	if (skb_cow_head(skb, BRCM_TAG_LEN) < 0)
-		goto out_free;
+		return NULL;
 
 	skb_push(skb, BRCM_TAG_LEN);
 
@@ -86,10 +86,6 @@ static struct sk_buff *brcm_tag_xmit(struct sk_buff *skb, struct net_device *dev
 	brcm_tag[3] = (1 << p->dp->index) & BRCM_IG_DSTMAP1_MASK;
 
 	return skb;
-
-out_free:
-	kfree_skb(skb);
-	return NULL;
 }
 
 static struct sk_buff *brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev,
@@ -101,30 +97,30 @@ static struct sk_buff *brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev,
 	int source_port;
 	u8 *brcm_tag;
 
-	ds = dst->cpu_switch;
+	ds = dst->cpu_dp->ds;
 
 	if (unlikely(!pskb_may_pull(skb, BRCM_TAG_LEN)))
-		goto out_drop;
+		return NULL;
 
 	/* skb->data points to the EtherType, the tag is right before it */
 	brcm_tag = skb->data - 2;
 
 	/* The opcode should never be different than 0b000 */
 	if (unlikely((brcm_tag[0] >> BRCM_OPCODE_SHIFT) & BRCM_OPCODE_MASK))
-		goto out_drop;
+		return NULL;
 
 	/* We should never see a reserved reason code without knowing how to
 	 * handle it
 	 */
 	if (unlikely(brcm_tag[2] & BRCM_EG_RC_RSVD))
-		goto out_drop;
+		return NULL;
 
 	/* Locate which port this is coming from */
 	source_port = brcm_tag[3] & BRCM_EG_PID_MASK;
 
 	/* Validate port against switch setup, either the port is totally */
 	if (source_port >= ds->num_ports || !ds->ports[source_port].netdev)
-		goto out_drop;
+		return NULL;
 
 	/* Remove Broadcom tag and update checksum */
 	skb_pull_rcsum(skb, BRCM_TAG_LEN);
@@ -137,9 +133,6 @@ static struct sk_buff *brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev,
 	skb->dev = ds->ports[source_port].netdev;
 
 	return skb;
-
-out_drop:
-	return NULL;
 }
 
 const struct dsa_device_ops brcm_netdev_ops = {
diff --git a/net/dsa/tag_dsa.c b/net/dsa/tag_dsa.c
index 1c6633f0de01..12867a4b458f 100644
--- a/net/dsa/tag_dsa.c
+++ b/net/dsa/tag_dsa.c
@@ -11,7 +11,7 @@
 #include <linux/etherdevice.h>
 #include <linux/list.h>
 #include <linux/slab.h>
-#include <net/dsa.h>
+
 #include "dsa_priv.h"
 
 #define DSA_HLEN	4
@@ -28,7 +28,7 @@ static struct sk_buff *dsa_xmit(struct sk_buff *skb, struct net_device *dev)
 	 */
 	if (skb->protocol == htons(ETH_P_8021Q)) {
 		if (skb_cow_head(skb, 0) < 0)
-			goto out_free;
+			return NULL;
 
 		/*
 		 * Construct tagged FROM_CPU DSA tag from 802.1q tag.
@@ -46,7 +46,7 @@ static struct sk_buff *dsa_xmit(struct sk_buff *skb, struct net_device *dev)
 		}
 	} else {
 		if (skb_cow_head(skb, DSA_HLEN) < 0)
-			goto out_free;
+			return NULL;
 		skb_push(skb, DSA_HLEN);
 
 		memmove(skb->data, skb->data + DSA_HLEN, 2 * ETH_ALEN);
@@ -62,10 +62,6 @@ static struct sk_buff *dsa_xmit(struct sk_buff *skb, struct net_device *dev)
 	}
 
 	return skb;
-
-out_free:
-	kfree_skb(skb);
-	return NULL;
 }
 
 static struct sk_buff *dsa_rcv(struct sk_buff *skb, struct net_device *dev,
@@ -79,7 +75,7 @@ static struct sk_buff *dsa_rcv(struct sk_buff *skb, struct net_device *dev,
 	int source_port;
 
 	if (unlikely(!pskb_may_pull(skb, DSA_HLEN)))
-		goto out_drop;
+		return NULL;
 
 	/*
 	 * The ethertype field is part of the DSA header.
@@ -90,7 +86,7 @@ static struct sk_buff *dsa_rcv(struct sk_buff *skb, struct net_device *dev,
 	 * Check that frame type is either TO_CPU or FORWARD.
 	 */
 	if ((dsa_header[0] & 0xc0) != 0x00 && (dsa_header[0] & 0xc0) != 0xc0)
-		goto out_drop;
+		return NULL;
 
 	/*
 	 * Determine source device and port.
@@ -103,14 +99,14 @@ static struct sk_buff *dsa_rcv(struct sk_buff *skb, struct net_device *dev,
 	 * port is a registered DSA port.
 	 */
 	if (source_device >= DSA_MAX_SWITCHES)
-		goto out_drop;
+		return NULL;
 
 	ds = dst->ds[source_device];
 	if (!ds)
-		goto out_drop;
+		return NULL;
 
 	if (source_port >= ds->num_ports || !ds->ports[source_port].netdev)
-		goto out_drop;
+		return NULL;
 
 	/*
 	 * Convert the DSA header to an 802.1q header if the 'tagged'
@@ -161,9 +157,6 @@ static struct sk_buff *dsa_rcv(struct sk_buff *skb, struct net_device *dev,
 	skb->dev = ds->ports[source_port].netdev;
 
 	return skb;
-
-out_drop:
-	return NULL;
 }
 
 const struct dsa_device_ops dsa_netdev_ops = {
diff --git a/net/dsa/tag_edsa.c b/net/dsa/tag_edsa.c
index d9c668aa5e54..67a9d26f9075 100644
--- a/net/dsa/tag_edsa.c
+++ b/net/dsa/tag_edsa.c
@@ -11,7 +11,7 @@
 #include <linux/etherdevice.h>
 #include <linux/list.h>
 #include <linux/slab.h>
-#include <net/dsa.h>
+
 #include "dsa_priv.h"
 
 #define DSA_HLEN	4
@@ -30,7 +30,7 @@ static struct sk_buff *edsa_xmit(struct sk_buff *skb, struct net_device *dev)
 	 */
 	if (skb->protocol == htons(ETH_P_8021Q)) {
 		if (skb_cow_head(skb, DSA_HLEN) < 0)
-			goto out_free;
+			return NULL;
 		skb_push(skb, DSA_HLEN);
 
 		memmove(skb->data, skb->data + DSA_HLEN, 2 * ETH_ALEN);
@@ -55,7 +55,7 @@ static struct sk_buff *edsa_xmit(struct sk_buff *skb, struct net_device *dev)
 		}
 	} else {
 		if (skb_cow_head(skb, EDSA_HLEN) < 0)
-			goto out_free;
+			return NULL;
 		skb_push(skb, EDSA_HLEN);
 
 		memmove(skb->data, skb->data + EDSA_HLEN, 2 * ETH_ALEN);
@@ -75,10 +75,6 @@ static struct sk_buff *edsa_xmit(struct sk_buff *skb, struct net_device *dev)
 	}
 
 	return skb;
-
-out_free:
-	kfree_skb(skb);
-	return NULL;
 }
 
 static struct sk_buff *edsa_rcv(struct sk_buff *skb, struct net_device *dev,
@@ -92,7 +88,7 @@ static struct sk_buff *edsa_rcv(struct sk_buff *skb, struct net_device *dev,
 	int source_port;
 
 	if (unlikely(!pskb_may_pull(skb, EDSA_HLEN)))
-		goto out_drop;
+		return NULL;
 
 	/*
 	 * Skip the two null bytes after the ethertype.
@@ -103,7 +99,7 @@ static struct sk_buff *edsa_rcv(struct sk_buff *skb, struct net_device *dev,
 	 * Check that frame type is either TO_CPU or FORWARD.
 	 */
 	if ((edsa_header[0] & 0xc0) != 0x00 && (edsa_header[0] & 0xc0) != 0xc0)
-		goto out_drop;
+		return NULL;
 
 	/*
 	 * Determine source device and port.
@@ -116,14 +112,14 @@ static struct sk_buff *edsa_rcv(struct sk_buff *skb, struct net_device *dev,
 	 * port is a registered DSA port.
 	 */
 	if (source_device >= DSA_MAX_SWITCHES)
-		goto out_drop;
+		return NULL;
 
 	ds = dst->ds[source_device];
 	if (!ds)
-		goto out_drop;
+		return NULL;
 
 	if (source_port >= ds->num_ports || !ds->ports[source_port].netdev)
-		goto out_drop;
+		return NULL;
 
 	/*
 	 * If the 'tagged' bit is set, convert the DSA tag to a 802.1q
@@ -180,9 +176,6 @@ static struct sk_buff *edsa_rcv(struct sk_buff *skb, struct net_device *dev,
 	skb->dev = ds->ports[source_port].netdev;
 
 	return skb;
-
-out_drop:
-	return NULL;
 }
 
 const struct dsa_device_ops edsa_netdev_ops = {
diff --git a/net/dsa/tag_ksz.c b/net/dsa/tag_ksz.c
new file mode 100644
index 000000000000..b94a334a1d02
--- /dev/null
+++ b/net/dsa/tag_ksz.c
@@ -0,0 +1,100 @@
+/*
+ * net/dsa/tag_ksz.c - Microchip KSZ Switch tag format handling
+ * Copyright (c) 2017 Microchip Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/etherdevice.h>
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <net/dsa.h>
+#include "dsa_priv.h"
+
+/* For Ingress (Host -> KSZ), 2 bytes are added before FCS.
+ * ---------------------------------------------------------------------------
+ * DA(6bytes)|SA(6bytes)|....|Data(nbytes)|tag0(1byte)|tag1(1byte)|FCS(4bytes)
+ * ---------------------------------------------------------------------------
+ * tag0 : Prioritization (not used now)
+ * tag1 : each bit represents port (eg, 0x01=port1, 0x02=port2, 0x10=port5)
+ *
+ * For Egress (KSZ -> Host), 1 byte is added before FCS.
+ * ---------------------------------------------------------------------------
+ * DA(6bytes)|SA(6bytes)|....|Data(nbytes)|tag0(1byte)|FCS(4bytes)
+ * ---------------------------------------------------------------------------
+ * tag0 : zero-based value represents port
+ *	  (eg, 0x00=port1, 0x02=port3, 0x06=port7)
+ */
+
+#define	KSZ_INGRESS_TAG_LEN	2
+#define	KSZ_EGRESS_TAG_LEN	1
+
+static struct sk_buff *ksz_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct dsa_slave_priv *p = netdev_priv(dev);
+	struct sk_buff *nskb;
+	int padlen;
+	u8 *tag;
+
+	padlen = (skb->len >= ETH_ZLEN) ? 0 : ETH_ZLEN - skb->len;
+
+	if (skb_tailroom(skb) >= padlen + KSZ_INGRESS_TAG_LEN) {
+		nskb = skb;
+	} else {
+		nskb = alloc_skb(NET_IP_ALIGN + skb->len +
+				 padlen + KSZ_INGRESS_TAG_LEN, GFP_ATOMIC);
+		if (!nskb)
+			return NULL;
+		skb_reserve(nskb, NET_IP_ALIGN);
+
+		skb_reset_mac_header(nskb);
+		skb_set_network_header(nskb,
+				       skb_network_header(skb) - skb->head);
+		skb_set_transport_header(nskb,
+					 skb_transport_header(skb) - skb->head);
+		skb_copy_and_csum_dev(skb, skb_put(nskb, skb->len));
+		kfree_skb(skb);
+	}
+
+	/* skb is freed when it fails */
+	if (skb_put_padto(nskb, nskb->len + padlen))
+		return NULL;
+
+	tag = skb_put(nskb, KSZ_INGRESS_TAG_LEN);
+	tag[0] = 0;
+	tag[1] = 1 << p->dp->index; /* destination port */
+
+	return nskb;
+}
+
+static struct sk_buff *ksz_rcv(struct sk_buff *skb, struct net_device *dev,
+			       struct packet_type *pt,
+			       struct net_device *orig_dev)
+{
+	struct dsa_switch_tree *dst = dev->dsa_ptr;
+	struct dsa_switch *ds;
+	u8 *tag;
+	int source_port;
+
+	ds = dst->cpu_dp->ds;
+
+	tag = skb_tail_pointer(skb) - KSZ_EGRESS_TAG_LEN;
+
+	source_port = tag[0] & 7;
+	if (source_port >= ds->num_ports || !ds->ports[source_port].netdev)
+		return NULL;
+
+	pskb_trim_rcsum(skb, skb->len - KSZ_EGRESS_TAG_LEN);
+
+	skb->dev = ds->ports[source_port].netdev;
+
+	return skb;
+}
+
+const struct dsa_device_ops ksz_netdev_ops = {
+	.xmit	= ksz_xmit,
+	.rcv	= ksz_rcv,
+};
diff --git a/net/dsa/tag_lan9303.c b/net/dsa/tag_lan9303.c
index 70130ed5c21a..247774d149f9 100644
--- a/net/dsa/tag_lan9303.c
+++ b/net/dsa/tag_lan9303.c
@@ -14,7 +14,7 @@
 #include <linux/etherdevice.h>
 #include <linux/list.h>
 #include <linux/slab.h>
-#include <net/dsa.h>
+
 #include "dsa_priv.h"
 
 /* To define the outgoing port and to discover the incoming port a regular
@@ -52,7 +52,7 @@ static struct sk_buff *lan9303_xmit(struct sk_buff *skb, struct net_device *dev)
 	if (skb_cow_head(skb, LAN9303_TAG_LEN) < 0) {
 		dev_dbg(&dev->dev,
 			"Cannot make room for the special tag. Dropping packet\n");
-		goto out_free;
+		return NULL;
 	}
 
 	/* provide 'LAN9303_TAG_LEN' bytes additional space */
@@ -66,9 +66,6 @@ static struct sk_buff *lan9303_xmit(struct sk_buff *skb, struct net_device *dev)
 	lan9303_tag[1] = htons(p->dp->index | BIT(4));
 
 	return skb;
-out_free:
-	kfree_skb(skb);
-	return NULL;
 }
 
 static struct sk_buff *lan9303_rcv(struct sk_buff *skb, struct net_device *dev,
diff --git a/net/dsa/tag_mtk.c b/net/dsa/tag_mtk.c
index 837cdddb53f0..2f32b7ea3365 100644
--- a/net/dsa/tag_mtk.c
+++ b/net/dsa/tag_mtk.c
@@ -13,7 +13,7 @@
  */
 
 #include <linux/etherdevice.h>
-#include <net/dsa.h>
+
 #include "dsa_priv.h"
 
 #define MTK_HDR_LEN		4
@@ -27,7 +27,7 @@ static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb,
 	u8 *mtk_tag;
 
 	if (skb_cow_head(skb, MTK_HDR_LEN) < 0)
-		goto out_free;
+		return NULL;
 
 	skb_push(skb, MTK_HDR_LEN);
 
@@ -41,10 +41,6 @@ static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb,
 	mtk_tag[3] = 0;
 
 	return skb;
-
-out_free:
-	kfree_skb(skb);
-	return NULL;
 }
 
 static struct sk_buff *mtk_tag_rcv(struct sk_buff *skb, struct net_device *dev,
@@ -57,7 +53,7 @@ static struct sk_buff *mtk_tag_rcv(struct sk_buff *skb, struct net_device *dev,
 	__be16 *phdr, hdr;
 
 	if (unlikely(!pskb_may_pull(skb, MTK_HDR_LEN)))
-		goto out_drop;
+		return NULL;
 
 	/* The MTK header is added by the switch between src addr
 	 * and ethertype at this point, skb->data points to 2 bytes
@@ -79,19 +75,16 @@ static struct sk_buff *mtk_tag_rcv(struct sk_buff *skb, struct net_device *dev,
 	 */
 	ds = dst->ds[0];
 	if (!ds)
-		goto out_drop;
+		return NULL;
 
 	/* Get source port information */
 	port = (hdr & MTK_HDR_RECV_SOURCE_PORT_MASK);
 	if (!ds->ports[port].netdev)
-		goto out_drop;
+		return NULL;
 
 	skb->dev = ds->ports[port].netdev;
 
 	return skb;
-
-out_drop:
-	return NULL;
 }
 
 const struct dsa_device_ops mtk_netdev_ops = {
diff --git a/net/dsa/tag_qca.c b/net/dsa/tag_qca.c
index 3ba3f59f7a34..4f43cf0b4eff 100644
--- a/net/dsa/tag_qca.c
+++ b/net/dsa/tag_qca.c
@@ -12,7 +12,7 @@
  */
 
 #include <linux/etherdevice.h>
-#include <net/dsa.h>
+
 #include "dsa_priv.h"
 
 #define QCA_HDR_LEN	2
@@ -45,7 +45,7 @@ static struct sk_buff *qca_tag_xmit(struct sk_buff *skb, struct net_device *dev)
 	dev->stats.tx_bytes += skb->len;
 
 	if (skb_cow_head(skb, 0) < 0)
-		goto out_free;
+		return NULL;
 
 	skb_push(skb, QCA_HDR_LEN);
 
@@ -60,10 +60,6 @@ static struct sk_buff *qca_tag_xmit(struct sk_buff *skb, struct net_device *dev)
 	*phdr = htons(hdr);
 
 	return skb;
-
-out_free:
-	kfree_skb(skb);
-	return NULL;
 }
 
 static struct sk_buff *qca_tag_rcv(struct sk_buff *skb, struct net_device *dev,
@@ -77,7 +73,7 @@ static struct sk_buff *qca_tag_rcv(struct sk_buff *skb, struct net_device *dev,
 	__be16 *phdr, hdr;
 
 	if (unlikely(!pskb_may_pull(skb, QCA_HDR_LEN)))
-		goto out_drop;
+		return NULL;
 
 	/* The QCA header is added by the switch between src addr and Ethertype
 	 * At this point, skb->data points to ethertype so header should be
@@ -89,7 +85,7 @@ static struct sk_buff *qca_tag_rcv(struct sk_buff *skb, struct net_device *dev,
 	/* Make sure the version is correct */
 	ver = (hdr & QCA_HDR_RECV_VERSION_MASK) >> QCA_HDR_RECV_VERSION_S;
 	if (unlikely(ver != QCA_HDR_VERSION))
-		goto out_drop;
+		return NULL;
 
 	/* Remove QCA tag and recalculate checksum */
 	skb_pull_rcsum(skb, QCA_HDR_LEN);
@@ -99,22 +95,19 @@ static struct sk_buff *qca_tag_rcv(struct sk_buff *skb, struct net_device *dev,
 	/* This protocol doesn't support cascading multiple switches so it's
 	 * safe to assume the switch is first in the tree
 	 */
-	ds = dst->cpu_switch;
+	ds = dst->cpu_dp->ds;
 	if (!ds)
-		goto out_drop;
+		return NULL;
 
 	/* Get source port information */
 	port = (hdr & QCA_HDR_RECV_SOURCE_PORT_MASK);
 	if (!ds->ports[port].netdev)
-		goto out_drop;
+		return NULL;
 
 	/* Update skb & forward the frame accordingly */
 	skb->dev = ds->ports[port].netdev;
 
 	return skb;
-
-out_drop:
-	return NULL;
 }
 
 const struct dsa_device_ops qca_netdev_ops = {
diff --git a/net/dsa/tag_trailer.c b/net/dsa/tag_trailer.c
index aafc2fc74c30..b4f6db094409 100644
--- a/net/dsa/tag_trailer.c
+++ b/net/dsa/tag_trailer.c
@@ -11,7 +11,7 @@
 #include <linux/etherdevice.h>
 #include <linux/list.h>
 #include <linux/slab.h>
-#include <net/dsa.h>
+
 #include "dsa_priv.h"
 
 static struct sk_buff *trailer_xmit(struct sk_buff *skb, struct net_device *dev)
@@ -32,10 +32,8 @@ static struct sk_buff *trailer_xmit(struct sk_buff *skb, struct net_device *dev)
 		padlen = 60 - skb->len;
 
 	nskb = alloc_skb(NET_IP_ALIGN + skb->len + padlen + 4, GFP_ATOMIC);
-	if (nskb == NULL) {
-		kfree_skb(skb);
+	if (!nskb)
 		return NULL;
-	}
 	skb_reserve(nskb, NET_IP_ALIGN);
 
 	skb_reset_mac_header(nskb);
@@ -67,28 +65,25 @@ static struct sk_buff *trailer_rcv(struct sk_buff *skb, struct net_device *dev,
 	u8 *trailer;
 	int source_port;
 
-	ds = dst->cpu_switch;
+	ds = dst->cpu_dp->ds;
 
 	if (skb_linearize(skb))
-		goto out_drop;
+		return NULL;
 
 	trailer = skb_tail_pointer(skb) - 4;
 	if (trailer[0] != 0x80 || (trailer[1] & 0xf8) != 0x00 ||
 	    (trailer[2] & 0xef) != 0x00 || trailer[3] != 0x00)
-		goto out_drop;
+		return NULL;
 
 	source_port = trailer[1] & 7;
 	if (source_port >= ds->num_ports || !ds->ports[source_port].netdev)
-		goto out_drop;
+		return NULL;
 
 	pskb_trim_rcsum(skb, skb->len - 4);
 
 	skb->dev = ds->ports[source_port].netdev;
 
 	return skb;
-
-out_drop:
-	return NULL;
 }
 
 const struct dsa_device_ops trailer_netdev_ops = {
diff --git a/net/ieee802154/socket.c b/net/ieee802154/socket.c
index eedba7670b51..a60658c85a9a 100644
--- a/net/ieee802154/socket.c
+++ b/net/ieee802154/socket.c
@@ -301,15 +301,14 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
 		goto out_skb;
 
 	skb->dev = dev;
-	skb->sk  = sk;
 	skb->protocol = htons(ETH_P_IEEE802154);
 
-	dev_put(dev);
-
 	err = dev_queue_xmit(skb);
 	if (err > 0)
 		err = net_xmit_errno(err);
 
+	dev_put(dev);
+
 	return err ?: size;
 
 out_skb:
@@ -690,15 +689,14 @@ static int dgram_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
 		goto out_skb;
 
 	skb->dev = dev;
-	skb->sk  = sk;
 	skb->protocol = htons(ETH_P_IEEE802154);
 
-	dev_put(dev);
-
 	err = dev_queue_xmit(skb);
 	if (err > 0)
 		err = net_xmit_errno(err);
 
+	dev_put(dev);
+
 	return err ?: size;
 
 out_skb:
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index 22377c8ff14b..e8f862358518 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -220,7 +220,9 @@ static int ah_output(struct xfrm_state *x, struct sk_buff *skb)
 	ah->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low);
 
 	sg_init_table(sg, nfrags + sglists);
-	skb_to_sgvec_nomark(skb, sg, 0, skb->len);
+	err = skb_to_sgvec_nomark(skb, sg, 0, skb->len);
+	if (unlikely(err < 0))
+		goto out_free;
 
 	if (x->props.flags & XFRM_STATE_ESN) {
 		/* Attach seqhi sg right after packet payload */
@@ -393,7 +395,9 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb)
 	skb_push(skb, ihl);
 
 	sg_init_table(sg, nfrags + sglists);
-	skb_to_sgvec_nomark(skb, sg, 0, skb->len);
+	err = skb_to_sgvec_nomark(skb, sg, 0, skb->len);
+	if (unlikely(err < 0))
+		goto out_free;
 
 	if (x->props.flags & XFRM_STATE_ESN) {
 		/* Attach seqhi sg right after packet payload */
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index e9f3386a528b..a651c53260ec 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -1113,13 +1113,17 @@ static int arp_invalidate(struct net_device *dev, __be32 ip)
 {
 	struct neighbour *neigh = neigh_lookup(&arp_tbl, &ip, dev);
 	int err = -ENXIO;
+	struct neigh_table *tbl = &arp_tbl;
 
 	if (neigh) {
 		if (neigh->nud_state & ~NUD_NOARP)
 			err = neigh_update(neigh, NULL, NUD_FAILED,
 					   NEIGH_UPDATE_F_OVERRIDE|
 					   NEIGH_UPDATE_F_ADMIN, 0);
+		write_lock_bh(&tbl->lock);
 		neigh_release(neigh);
+		neigh_remove_one(neigh, tbl);
+		write_unlock_bh(&tbl->lock);
 	}
 
 	return err;
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 93322f895eab..d815d1755473 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -377,9 +377,11 @@ int esp_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *
 	esp->esph = esph;
 
 	sg_init_table(sg, esp->nfrags);
-	skb_to_sgvec(skb, sg,
-		     (unsigned char *)esph - skb->data,
-		     assoclen + ivlen + esp->clen + alen);
+	err = skb_to_sgvec(skb, sg,
+		           (unsigned char *)esph - skb->data,
+		           assoclen + ivlen + esp->clen + alen);
+	if (unlikely(err < 0))
+		goto error;
 
 	if (!esp->inplace) {
 		int allocsize;
@@ -403,9 +405,11 @@ int esp_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *
 		spin_unlock_bh(&x->lock);
 
 		sg_init_table(dsg, skb_shinfo(skb)->nr_frags + 1);
-		skb_to_sgvec(skb, dsg,
-			     (unsigned char *)esph - skb->data,
-			     assoclen + ivlen + esp->clen + alen);
+		err = skb_to_sgvec(skb, dsg,
+			           (unsigned char *)esph - skb->data,
+			           assoclen + ivlen + esp->clen + alen);
+		if (unlikely(err < 0))
+			goto error;
 	}
 
 	if ((x->props.flags & XFRM_STATE_ESN))
@@ -690,7 +694,9 @@ skip_cow:
 	esp_input_set_header(skb, seqhi);
 
 	sg_init_table(sg, nfrags);
-	skb_to_sgvec(skb, sg, 0, skb->len);
+	err = skb_to_sgvec(skb, sg, 0, skb->len);
+	if (unlikely(err < 0))
+		goto out;
 
 	skb->ip_summed = CHECKSUM_NONE;
 
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 83e3ed258467..4e678fa892dd 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -588,13 +588,15 @@ int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg)
 			if (cmd == SIOCDELRT) {
 				tb = fib_get_table(net, cfg.fc_table);
 				if (tb)
-					err = fib_table_delete(net, tb, &cfg);
+					err = fib_table_delete(net, tb, &cfg,
+							       NULL);
 				else
 					err = -ESRCH;
 			} else {
 				tb = fib_new_table(net, cfg.fc_table);
 				if (tb)
-					err = fib_table_insert(net, tb, &cfg);
+					err = fib_table_insert(net, tb,
+							       &cfg, NULL);
 				else
 					err = -ENOBUFS;
 			}
@@ -626,14 +628,15 @@ const struct nla_policy rtm_ipv4_policy[RTA_MAX + 1] = {
 };
 
 static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
-			     struct nlmsghdr *nlh, struct fib_config *cfg)
+			     struct nlmsghdr *nlh, struct fib_config *cfg,
+			     struct netlink_ext_ack *extack)
 {
 	struct nlattr *attr;
 	int err, remaining;
 	struct rtmsg *rtm;
 
 	err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy,
-			     NULL);
+			     extack);
 	if (err < 0)
 		goto errout;
 
@@ -654,6 +657,7 @@ static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
 	cfg->fc_nlinfo.nl_net = net;
 
 	if (cfg->fc_type > RTN_MAX) {
+		NL_SET_ERR_MSG(extack, "Invalid route type");
 		err = -EINVAL;
 		goto errout;
 	}
@@ -681,7 +685,8 @@ static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
 			break;
 		case RTA_MULTIPATH:
 			err = lwtunnel_valid_encap_type_attr(nla_data(attr),
-							     nla_len(attr));
+							     nla_len(attr),
+							     extack);
 			if (err < 0)
 				goto errout;
 			cfg->fc_mp = nla_data(attr);
@@ -698,7 +703,8 @@ static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
 			break;
 		case RTA_ENCAP_TYPE:
 			cfg->fc_encap_type = nla_get_u16(attr);
-			err = lwtunnel_valid_encap_type(cfg->fc_encap_type);
+			err = lwtunnel_valid_encap_type(cfg->fc_encap_type,
+							extack);
 			if (err < 0)
 				goto errout;
 			break;
@@ -718,17 +724,18 @@ static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
 	struct fib_table *tb;
 	int err;
 
-	err = rtm_to_fib_config(net, skb, nlh, &cfg);
+	err = rtm_to_fib_config(net, skb, nlh, &cfg, extack);
 	if (err < 0)
 		goto errout;
 
 	tb = fib_get_table(net, cfg.fc_table);
 	if (!tb) {
+		NL_SET_ERR_MSG(extack, "FIB table does not exist");
 		err = -ESRCH;
 		goto errout;
 	}
 
-	err = fib_table_delete(net, tb, &cfg);
+	err = fib_table_delete(net, tb, &cfg, extack);
 errout:
 	return err;
 }
@@ -741,7 +748,7 @@ static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
 	struct fib_table *tb;
 	int err;
 
-	err = rtm_to_fib_config(net, skb, nlh, &cfg);
+	err = rtm_to_fib_config(net, skb, nlh, &cfg, extack);
 	if (err < 0)
 		goto errout;
 
@@ -751,7 +758,7 @@ static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
 		goto errout;
 	}
 
-	err = fib_table_insert(net, tb, &cfg);
+	err = fib_table_insert(net, tb, &cfg, extack);
 errout:
 	return err;
 }
@@ -845,9 +852,9 @@ static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifad
 		cfg.fc_scope = RT_SCOPE_HOST;
 
 	if (cmd == RTM_NEWROUTE)
-		fib_table_insert(net, tb, &cfg);
+		fib_table_insert(net, tb, &cfg, NULL);
 	else
-		fib_table_delete(net, tb, &cfg);
+		fib_table_delete(net, tb, &cfg, NULL);
 }
 
 void fib_add_ifaddr(struct in_ifaddr *ifa)
diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h
index 9c02920725db..769ab87ebc4b 100644
--- a/net/ipv4/fib_lookup.h
+++ b/net/ipv4/fib_lookup.h
@@ -28,8 +28,10 @@ static inline void fib_alias_accessed(struct fib_alias *fa)
 
 /* Exported by fib_semantics.c */
 void fib_release_info(struct fib_info *);
-struct fib_info *fib_create_info(struct fib_config *cfg);
-int fib_nh_match(struct fib_config *cfg, struct fib_info *fi);
+struct fib_info *fib_create_info(struct fib_config *cfg,
+				 struct netlink_ext_ack *extack);
+int fib_nh_match(struct fib_config *cfg, struct fib_info *fi,
+		 struct netlink_ext_ack *extack);
 int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, u32 tb_id,
 		  u8 type, __be32 dst, int dst_len, u8 tos, struct fib_info *fi,
 		  unsigned int);
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index ad9ad4aab5da..2157dc08c407 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -32,6 +32,7 @@
 #include <linux/skbuff.h>
 #include <linux/init.h>
 #include <linux/slab.h>
+#include <linux/netlink.h>
 
 #include <net/arp.h>
 #include <net/ip.h>
@@ -456,7 +457,8 @@ static int fib_detect_death(struct fib_info *fi, int order,
 
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
 
-static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining)
+static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining,
+			      struct netlink_ext_ack *extack)
 {
 	int nhs = 0;
 
@@ -466,22 +468,35 @@ static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining)
 	}
 
 	/* leftover implies invalid nexthop configuration, discard it */
-	return remaining > 0 ? 0 : nhs;
+	if (remaining > 0) {
+		NL_SET_ERR_MSG(extack,
+			       "Invalid nexthop configuration - extra data after nexthops");
+		nhs = 0;
+	}
+
+	return nhs;
 }
 
 static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
-		       int remaining, struct fib_config *cfg)
+		       int remaining, struct fib_config *cfg,
+		       struct netlink_ext_ack *extack)
 {
 	int ret;
 
 	change_nexthops(fi) {
 		int attrlen;
 
-		if (!rtnh_ok(rtnh, remaining))
+		if (!rtnh_ok(rtnh, remaining)) {
+			NL_SET_ERR_MSG(extack,
+				       "Invalid nexthop configuration - extra data after nexthop");
 			return -EINVAL;
+		}
 
-		if (rtnh->rtnh_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN))
+		if (rtnh->rtnh_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN)) {
+			NL_SET_ERR_MSG(extack,
+				       "Invalid flags for nexthop - can not contain DEAD or LINKDOWN");
 			return -EINVAL;
+		}
 
 		nexthop_nh->nh_flags =
 			(cfg->fc_flags & ~0xFF) | rtnh->rtnh_flags;
@@ -507,13 +522,17 @@ static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
 
 				nla_entype = nla_find(attrs, attrlen,
 						      RTA_ENCAP_TYPE);
-				if (!nla_entype)
+				if (!nla_entype) {
+					NL_SET_BAD_ATTR(extack, nla);
+					NL_SET_ERR_MSG(extack,
+						       "Encap type is missing");
 					goto err_inval;
+				}
 
 				ret = lwtunnel_build_state(nla_get_u16(
 							   nla_entype),
 							   nla,  AF_INET, cfg,
-							   &lwtstate);
+							   &lwtstate, extack);
 				if (ret)
 					goto errout;
 				nexthop_nh->nh_lwtstate =
@@ -595,7 +614,8 @@ static inline void fib_add_weight(struct fib_info *fi,
 static int fib_encap_match(u16 encap_type,
 			   struct nlattr *encap,
 			   const struct fib_nh *nh,
-			   const struct fib_config *cfg)
+			   const struct fib_config *cfg,
+			   struct netlink_ext_ack *extack)
 {
 	struct lwtunnel_state *lwtstate;
 	int ret, result = 0;
@@ -603,8 +623,8 @@ static int fib_encap_match(u16 encap_type,
 	if (encap_type == LWTUNNEL_ENCAP_NONE)
 		return 0;
 
-	ret = lwtunnel_build_state(encap_type, encap,
-				   AF_INET, cfg, &lwtstate);
+	ret = lwtunnel_build_state(encap_type, encap, AF_INET,
+				   cfg, &lwtstate, extack);
 	if (!ret) {
 		result = lwtunnel_cmp_encap(lwtstate, nh->nh_lwtstate);
 		lwtstate_free(lwtstate);
@@ -613,7 +633,8 @@ static int fib_encap_match(u16 encap_type,
 	return result;
 }
 
-int fib_nh_match(struct fib_config *cfg, struct fib_info *fi)
+int fib_nh_match(struct fib_config *cfg, struct fib_info *fi,
+		 struct netlink_ext_ack *extack)
 {
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
 	struct rtnexthop *rtnh;
@@ -625,9 +646,9 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi)
 
 	if (cfg->fc_oif || cfg->fc_gw) {
 		if (cfg->fc_encap) {
-			if (fib_encap_match(cfg->fc_encap_type,
-					    cfg->fc_encap, fi->fib_nh, cfg))
-			    return 1;
+			if (fib_encap_match(cfg->fc_encap_type, cfg->fc_encap,
+					    fi->fib_nh, cfg, extack))
+				return 1;
 		}
 		if ((!cfg->fc_oif || cfg->fc_oif == fi->fib_nh->nh_oif) &&
 		    (!cfg->fc_gw  || cfg->fc_gw == fi->fib_nh->nh_gw))
@@ -716,7 +737,7 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi)
  *					|-> {local prefix} (terminal node)
  */
 static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
-			struct fib_nh *nh)
+			struct fib_nh *nh, struct netlink_ext_ack *extack)
 {
 	int err = 0;
 	struct net *net;
@@ -729,16 +750,25 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
 		if (nh->nh_flags & RTNH_F_ONLINK) {
 			unsigned int addr_type;
 
-			if (cfg->fc_scope >= RT_SCOPE_LINK)
+			if (cfg->fc_scope >= RT_SCOPE_LINK) {
+				NL_SET_ERR_MSG(extack,
+					       "Nexthop has invalid scope");
 				return -EINVAL;
+			}
 			dev = __dev_get_by_index(net, nh->nh_oif);
 			if (!dev)
 				return -ENODEV;
-			if (!(dev->flags & IFF_UP))
+			if (!(dev->flags & IFF_UP)) {
+				NL_SET_ERR_MSG(extack,
+					       "Nexthop device is not up");
 				return -ENETDOWN;
+			}
 			addr_type = inet_addr_type_dev_table(net, dev, nh->nh_gw);
-			if (addr_type != RTN_UNICAST)
+			if (addr_type != RTN_UNICAST) {
+				NL_SET_ERR_MSG(extack,
+					       "Nexthop has invalid gateway");
 				return -EINVAL;
+			}
 			if (!netif_carrier_ok(dev))
 				nh->nh_flags |= RTNH_F_LINKDOWN;
 			nh->nh_dev = dev;
@@ -778,18 +808,25 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
 			}
 
 			if (err) {
+				NL_SET_ERR_MSG(extack,
+					       "Nexthop has invalid gateway");
 				rcu_read_unlock();
 				return err;
 			}
 		}
 		err = -EINVAL;
-		if (res.type != RTN_UNICAST && res.type != RTN_LOCAL)
+		if (res.type != RTN_UNICAST && res.type != RTN_LOCAL) {
+			NL_SET_ERR_MSG(extack, "Nexthop has invalid gateway");
 			goto out;
+		}
 		nh->nh_scope = res.scope;
 		nh->nh_oif = FIB_RES_OIF(res);
 		nh->nh_dev = dev = FIB_RES_DEV(res);
-		if (!dev)
+		if (!dev) {
+			NL_SET_ERR_MSG(extack,
+				       "No egress device for nexthop gateway");
 			goto out;
+		}
 		dev_hold(dev);
 		if (!netif_carrier_ok(dev))
 			nh->nh_flags |= RTNH_F_LINKDOWN;
@@ -797,17 +834,21 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
 	} else {
 		struct in_device *in_dev;
 
-		if (nh->nh_flags & (RTNH_F_PERVASIVE | RTNH_F_ONLINK))
+		if (nh->nh_flags & (RTNH_F_PERVASIVE | RTNH_F_ONLINK)) {
+			NL_SET_ERR_MSG(extack,
+				       "Invalid flags for nexthop - PERVASIVE and ONLINK can not be set");
 			return -EINVAL;
-
+		}
 		rcu_read_lock();
 		err = -ENODEV;
 		in_dev = inetdev_by_index(net, nh->nh_oif);
 		if (!in_dev)
 			goto out;
 		err = -ENETDOWN;
-		if (!(in_dev->dev->flags & IFF_UP))
+		if (!(in_dev->dev->flags & IFF_UP)) {
+			NL_SET_ERR_MSG(extack, "Device for nexthop is not up");
 			goto out;
+		}
 		nh->nh_dev = in_dev->dev;
 		dev_hold(nh->nh_dev);
 		nh->nh_scope = RT_SCOPE_HOST;
@@ -982,7 +1023,8 @@ fib_convert_metrics(struct fib_info *fi, const struct fib_config *cfg)
 	return 0;
 }
 
-struct fib_info *fib_create_info(struct fib_config *cfg)
+struct fib_info *fib_create_info(struct fib_config *cfg,
+				 struct netlink_ext_ack *extack)
 {
 	int err;
 	struct fib_info *fi = NULL;
@@ -994,15 +1036,20 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
 		goto err_inval;
 
 	/* Fast check to catch the most weird cases */
-	if (fib_props[cfg->fc_type].scope > cfg->fc_scope)
+	if (fib_props[cfg->fc_type].scope > cfg->fc_scope) {
+		NL_SET_ERR_MSG(extack, "Invalid scope");
 		goto err_inval;
+	}
 
-	if (cfg->fc_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN))
+	if (cfg->fc_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN)) {
+		NL_SET_ERR_MSG(extack,
+			       "Invalid rtm_flags - can not contain DEAD or LINKDOWN");
 		goto err_inval;
+	}
 
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
 	if (cfg->fc_mp) {
-		nhs = fib_count_nexthops(cfg->fc_mp, cfg->fc_mp_len);
+		nhs = fib_count_nexthops(cfg->fc_mp, cfg->fc_mp_len, extack);
 		if (nhs == 0)
 			goto err_inval;
 	}
@@ -1065,18 +1112,29 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
 
 	if (cfg->fc_mp) {
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
-		err = fib_get_nhs(fi, cfg->fc_mp, cfg->fc_mp_len, cfg);
+		err = fib_get_nhs(fi, cfg->fc_mp, cfg->fc_mp_len, cfg, extack);
 		if (err != 0)
 			goto failure;
-		if (cfg->fc_oif && fi->fib_nh->nh_oif != cfg->fc_oif)
+		if (cfg->fc_oif && fi->fib_nh->nh_oif != cfg->fc_oif) {
+			NL_SET_ERR_MSG(extack,
+				       "Nexthop device index does not match RTA_OIF");
 			goto err_inval;
-		if (cfg->fc_gw && fi->fib_nh->nh_gw != cfg->fc_gw)
+		}
+		if (cfg->fc_gw && fi->fib_nh->nh_gw != cfg->fc_gw) {
+			NL_SET_ERR_MSG(extack,
+				       "Nexthop gateway does not match RTA_GATEWAY");
 			goto err_inval;
+		}
 #ifdef CONFIG_IP_ROUTE_CLASSID
-		if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow)
+		if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow) {
+			NL_SET_ERR_MSG(extack,
+				       "Nexthop class id does not match RTA_FLOW");
 			goto err_inval;
+		}
 #endif
 #else
+		NL_SET_ERR_MSG(extack,
+			       "Multipath support not enabled in kernel");
 		goto err_inval;
 #endif
 	} else {
@@ -1085,11 +1143,14 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
 		if (cfg->fc_encap) {
 			struct lwtunnel_state *lwtstate;
 
-			if (cfg->fc_encap_type == LWTUNNEL_ENCAP_NONE)
+			if (cfg->fc_encap_type == LWTUNNEL_ENCAP_NONE) {
+				NL_SET_ERR_MSG(extack,
+					       "LWT encap type not specified");
 				goto err_inval;
+			}
 			err = lwtunnel_build_state(cfg->fc_encap_type,
 						   cfg->fc_encap, AF_INET, cfg,
-						   &lwtstate);
+						   &lwtstate, extack);
 			if (err)
 				goto failure;
 
@@ -1109,8 +1170,11 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
 	}
 
 	if (fib_props[cfg->fc_type].error) {
-		if (cfg->fc_gw || cfg->fc_oif || cfg->fc_mp)
+		if (cfg->fc_gw || cfg->fc_oif || cfg->fc_mp) {
+			NL_SET_ERR_MSG(extack,
+				       "Gateway, device and multipath can not be specified for this route type");
 			goto err_inval;
+		}
 		goto link_it;
 	} else {
 		switch (cfg->fc_type) {
@@ -1121,19 +1185,30 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
 		case RTN_MULTICAST:
 			break;
 		default:
+			NL_SET_ERR_MSG(extack, "Invalid route type");
 			goto err_inval;
 		}
 	}
 
-	if (cfg->fc_scope > RT_SCOPE_HOST)
+	if (cfg->fc_scope > RT_SCOPE_HOST) {
+		NL_SET_ERR_MSG(extack, "Invalid scope");
 		goto err_inval;
+	}
 
 	if (cfg->fc_scope == RT_SCOPE_HOST) {
 		struct fib_nh *nh = fi->fib_nh;
 
 		/* Local address is added. */
-		if (nhs != 1 || nh->nh_gw)
+		if (nhs != 1) {
+			NL_SET_ERR_MSG(extack,
+				       "Route with host scope can not have multiple nexthops");
+			goto err_inval;
+		}
+		if (nh->nh_gw) {
+			NL_SET_ERR_MSG(extack,
+				       "Route with host scope can not have a gateway");
 			goto err_inval;
+		}
 		nh->nh_scope = RT_SCOPE_NOWHERE;
 		nh->nh_dev = dev_get_by_index(net, fi->fib_nh->nh_oif);
 		err = -ENODEV;
@@ -1143,7 +1218,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
 		int linkdown = 0;
 
 		change_nexthops(fi) {
-			err = fib_check_nh(cfg, fi, nexthop_nh);
+			err = fib_check_nh(cfg, fi, nexthop_nh, extack);
 			if (err != 0)
 				goto failure;
 			if (nexthop_nh->nh_flags & RTNH_F_LINKDOWN)
@@ -1153,8 +1228,10 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
 			fi->fib_flags |= RTNH_F_LINKDOWN;
 	}
 
-	if (fi->fib_prefsrc && !fib_valid_prefsrc(cfg, fi->fib_prefsrc))
+	if (fi->fib_prefsrc && !fib_valid_prefsrc(cfg, fi->fib_prefsrc)) {
+		NL_SET_ERR_MSG(extack, "Invalid prefsrc address");
 		goto err_inval;
+	}
 
 	change_nexthops(fi) {
 		fib_info_update_nh_saddr(net, nexthop_nh);
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 51182ff2b441..d56659e97a6e 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1099,9 +1099,25 @@ static int fib_insert_alias(struct trie *t, struct key_vector *tp,
 	return 0;
 }
 
+static bool fib_valid_key_len(u32 key, u8 plen, struct netlink_ext_ack *extack)
+{
+	if (plen > KEYLENGTH) {
+		NL_SET_ERR_MSG(extack, "Invalid prefix length");
+		return false;
+	}
+
+	if ((plen < KEYLENGTH) && (key << plen)) {
+		NL_SET_ERR_MSG(extack,
+			       "Invalid prefix for given prefix length");
+		return false;
+	}
+
+	return true;
+}
+
 /* Caller must hold RTNL. */
 int fib_table_insert(struct net *net, struct fib_table *tb,
-		     struct fib_config *cfg)
+		     struct fib_config *cfg, struct netlink_ext_ack *extack)
 {
 	enum fib_event_type event = FIB_EVENT_ENTRY_ADD;
 	struct trie *t = (struct trie *)tb->tb_data;
@@ -1115,17 +1131,14 @@ int fib_table_insert(struct net *net, struct fib_table *tb,
 	u32 key;
 	int err;
 
-	if (plen > KEYLENGTH)
-		return -EINVAL;
-
 	key = ntohl(cfg->fc_dst);
 
-	pr_debug("Insert table=%u %08x/%d\n", tb->tb_id, key, plen);
-
-	if ((plen < KEYLENGTH) && (key << plen))
+	if (!fib_valid_key_len(key, plen, extack))
 		return -EINVAL;
 
-	fi = fib_create_info(cfg);
+	pr_debug("Insert table=%u %08x/%d\n", tb->tb_id, key, plen);
+
+	fi = fib_create_info(cfg, extack);
 	if (IS_ERR(fi)) {
 		err = PTR_ERR(fi);
 		goto err;
@@ -1452,6 +1465,7 @@ found:
 			if (!(fib_flags & FIB_LOOKUP_NOREF))
 				atomic_inc(&fi->fib_clntref);
 
+			res->prefix = htonl(n->key);
 			res->prefixlen = KEYLENGTH - fa->fa_slen;
 			res->nh_sel = nhsel;
 			res->type = fa->fa_type;
@@ -1507,7 +1521,7 @@ static void fib_remove_alias(struct trie *t, struct key_vector *tp,
 
 /* Caller must hold RTNL. */
 int fib_table_delete(struct net *net, struct fib_table *tb,
-		     struct fib_config *cfg)
+		     struct fib_config *cfg, struct netlink_ext_ack *extack)
 {
 	struct trie *t = (struct trie *) tb->tb_data;
 	struct fib_alias *fa, *fa_to_delete;
@@ -1517,12 +1531,9 @@ int fib_table_delete(struct net *net, struct fib_table *tb,
 	u8 tos = cfg->fc_tos;
 	u32 key;
 
-	if (plen > KEYLENGTH)
-		return -EINVAL;
-
 	key = ntohl(cfg->fc_dst);
 
-	if ((plen < KEYLENGTH) && (key << plen))
+	if (!fib_valid_key_len(key, plen, extack))
 		return -EINVAL;
 
 	l = fib_find_node(t, &tp, key);
@@ -1551,7 +1562,7 @@ int fib_table_delete(struct net *net, struct fib_table *tb,
 		     fi->fib_prefsrc == cfg->fc_prefsrc) &&
 		    (!cfg->fc_protocol ||
 		     fi->fib_protocol == cfg->fc_protocol) &&
-		    fib_nh_match(cfg, fi) == 0) {
+		    fib_nh_match(cfg, fi, extack) == 0) {
 			fa_to_delete = fa;
 			break;
 		}
diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index 805f6607f8d9..8e0257d01200 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -8,6 +8,7 @@
 #include <linux/kernel.h>
 #include <net/genetlink.h>
 #include <net/gue.h>
+#include <net/fou.h>
 #include <net/ip.h>
 #include <net/protocol.h>
 #include <net/udp.h>
@@ -859,25 +860,6 @@ size_t gue_encap_hlen(struct ip_tunnel_encap *e)
 }
 EXPORT_SYMBOL(gue_encap_hlen);
 
-static void fou_build_udp(struct sk_buff *skb, struct ip_tunnel_encap *e,
-			  struct flowi4 *fl4, u8 *protocol, __be16 sport)
-{
-	struct udphdr *uh;
-
-	skb_push(skb, sizeof(struct udphdr));
-	skb_reset_transport_header(skb);
-
-	uh = udp_hdr(skb);
-
-	uh->dest = e->dport;
-	uh->source = sport;
-	uh->len = htons(skb->len);
-	udp_set_csum(!(e->flags & TUNNEL_ENCAP_FLAG_CSUM), skb,
-		     fl4->saddr, fl4->daddr, skb->len);
-
-	*protocol = IPPROTO_UDP;
-}
-
 int __fou_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
 		       u8 *protocol, __be16 *sport, int type)
 {
@@ -894,24 +876,6 @@ int __fou_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
 }
 EXPORT_SYMBOL(__fou_build_header);
 
-int fou_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
-		     u8 *protocol, struct flowi4 *fl4)
-{
-	int type = e->flags & TUNNEL_ENCAP_FLAG_CSUM ? SKB_GSO_UDP_TUNNEL_CSUM :
-						       SKB_GSO_UDP_TUNNEL;
-	__be16 sport;
-	int err;
-
-	err = __fou_build_header(skb, e, protocol, &sport, type);
-	if (err)
-		return err;
-
-	fou_build_udp(skb, e, fl4, protocol, sport);
-
-	return 0;
-}
-EXPORT_SYMBOL(fou_build_header);
-
 int __gue_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
 		       u8 *protocol, __be16 *sport, int type)
 {
@@ -985,8 +949,46 @@ int __gue_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
 }
 EXPORT_SYMBOL(__gue_build_header);
 
-int gue_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
-		     u8 *protocol, struct flowi4 *fl4)
+#ifdef CONFIG_NET_FOU_IP_TUNNELS
+
+static void fou_build_udp(struct sk_buff *skb, struct ip_tunnel_encap *e,
+			  struct flowi4 *fl4, u8 *protocol, __be16 sport)
+{
+	struct udphdr *uh;
+
+	skb_push(skb, sizeof(struct udphdr));
+	skb_reset_transport_header(skb);
+
+	uh = udp_hdr(skb);
+
+	uh->dest = e->dport;
+	uh->source = sport;
+	uh->len = htons(skb->len);
+	udp_set_csum(!(e->flags & TUNNEL_ENCAP_FLAG_CSUM), skb,
+		     fl4->saddr, fl4->daddr, skb->len);
+
+	*protocol = IPPROTO_UDP;
+}
+
+static int fou_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
+			    u8 *protocol, struct flowi4 *fl4)
+{
+	int type = e->flags & TUNNEL_ENCAP_FLAG_CSUM ? SKB_GSO_UDP_TUNNEL_CSUM :
+						       SKB_GSO_UDP_TUNNEL;
+	__be16 sport;
+	int err;
+
+	err = __fou_build_header(skb, e, protocol, &sport, type);
+	if (err)
+		return err;
+
+	fou_build_udp(skb, e, fl4, protocol, sport);
+
+	return 0;
+}
+
+static int gue_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
+			    u8 *protocol, struct flowi4 *fl4)
 {
 	int type = e->flags & TUNNEL_ENCAP_FLAG_CSUM ? SKB_GSO_UDP_TUNNEL_CSUM :
 						       SKB_GSO_UDP_TUNNEL;
@@ -1001,9 +1003,7 @@ int gue_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
 
 	return 0;
 }
-EXPORT_SYMBOL(gue_build_header);
 
-#ifdef CONFIG_NET_FOU_IP_TUNNELS
 
 static const struct ip_tunnel_encap_ops fou_iptun_ops = {
 	.encap_hlen = fou_encap_hlen,
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 43318b5f5647..5610971bf859 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -489,7 +489,7 @@ static struct rtable *icmp_route_lookup(struct net *net,
 	fl4->flowi4_oif = l3mdev_master_ifindex(skb_dst(skb_in)->dev);
 
 	security_skb_classify_flow(skb_in, flowi4_to_flowi(fl4));
-	rt = __ip_route_output_key_hash(net, fl4, skb_in);
+	rt = ip_route_output_key_hash(net, fl4, skb_in);
 	if (IS_ERR(rt))
 		return rt;
 
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 1054d330bf9d..a3fa1a5b6d98 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -25,6 +25,7 @@
 #include <net/xfrm.h>
 #include <net/tcp.h>
 #include <net/sock_reuseport.h>
+#include <net/addrconf.h>
 
 #ifdef INET_CSK_DEBUG
 const char inet_csk_timer_bug_msg[] = "inet_csk BUG: unknown timer value\n";
@@ -789,7 +790,6 @@ struct sock *inet_csk_clone_lock(const struct sock *sk,
 		inet_sk(newsk)->inet_dport = inet_rsk(req)->ir_rmt_port;
 		inet_sk(newsk)->inet_num = inet_rsk(req)->ir_num;
 		inet_sk(newsk)->inet_sport = htons(inet_rsk(req)->ir_num);
-		newsk->sk_write_space = sk_stream_write_space;
 
 		/* listeners have SOCK_RCU_FREE, not the children */
 		sock_reset_flag(newsk, SOCK_RCU_FREE);
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index baf196eaf1d8..90e11479c725 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -228,14 +228,16 @@ static const struct nla_policy ip_tun_policy[LWTUNNEL_IP_MAX + 1] = {
 
 static int ip_tun_build_state(struct nlattr *attr,
 			      unsigned int family, const void *cfg,
-			      struct lwtunnel_state **ts)
+			      struct lwtunnel_state **ts,
+			      struct netlink_ext_ack *extack)
 {
 	struct ip_tunnel_info *tun_info;
 	struct lwtunnel_state *new_state;
 	struct nlattr *tb[LWTUNNEL_IP_MAX + 1];
 	int err;
 
-	err = nla_parse_nested(tb, LWTUNNEL_IP_MAX, attr, ip_tun_policy, NULL);
+	err = nla_parse_nested(tb, LWTUNNEL_IP_MAX, attr, ip_tun_policy,
+			       extack);
 	if (err < 0)
 		return err;
 
@@ -325,7 +327,8 @@ static const struct nla_policy ip6_tun_policy[LWTUNNEL_IP6_MAX + 1] = {
 
 static int ip6_tun_build_state(struct nlattr *attr,
 			       unsigned int family, const void *cfg,
-			       struct lwtunnel_state **ts)
+			       struct lwtunnel_state **ts,
+			       struct netlink_ext_ack *extack)
 {
 	struct ip_tunnel_info *tun_info;
 	struct lwtunnel_state *new_state;
@@ -333,7 +336,7 @@ static int ip6_tun_build_state(struct nlattr *attr,
 	int err;
 
 	err = nla_parse_nested(tb, LWTUNNEL_IP6_MAX, attr, ip6_tun_policy,
-			       NULL);
+			       extack);
 	if (err < 0)
 		return err;
 
diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c
index 7cd8d0d918f8..6f8d9e5e062b 100644
--- a/net/ipv4/netfilter/nf_reject_ipv4.c
+++ b/net/ipv4/netfilter/nf_reject_ipv4.c
@@ -172,7 +172,7 @@ void nf_send_unreach(struct sk_buff *skb_in, int code, int hook)
 	struct iphdr *iph = ip_hdr(skb_in);
 	u8 proto;
 
-	if (skb_in->csum_bad || iph->frag_off & htons(IP_OFFSET))
+	if (iph->frag_off & htons(IP_OFFSET))
 		return;
 
 	if (skb_csum_unnecessary(skb_in)) {
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 6883b3d4ba8f..9b38cf18144e 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -114,6 +114,8 @@
 #include <net/ip_tunnels.h>
 #include <net/l3mdev.h>
 
+#include "fib_lookup.h"
+
 #define RT_FL_TOS(oldflp4) \
 	((oldflp4)->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK))
 
@@ -1860,9 +1862,9 @@ static int ip_mkroute_input(struct sk_buff *skb,
  */
 
 static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
-			       u8 tos, struct net_device *dev)
+			       u8 tos, struct net_device *dev,
+			       struct fib_result *res)
 {
-	struct fib_result res;
 	struct in_device *in_dev = __in_dev_get_rcu(dev);
 	struct ip_tunnel_info *tun_info;
 	struct flowi4	fl4;
@@ -1892,8 +1894,8 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 	if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr))
 		goto martian_source;
 
-	res.fi = NULL;
-	res.table = NULL;
+	res->fi = NULL;
+	res->table = NULL;
 	if (ipv4_is_lbcast(daddr) || (saddr == 0 && daddr == 0))
 		goto brd_input;
 
@@ -1929,17 +1931,17 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 	fl4.daddr = daddr;
 	fl4.saddr = saddr;
 	fl4.flowi4_uid = sock_net_uid(net, NULL);
-	err = fib_lookup(net, &fl4, &res, 0);
+	err = fib_lookup(net, &fl4, res, 0);
 	if (err != 0) {
 		if (!IN_DEV_FORWARD(in_dev))
 			err = -EHOSTUNREACH;
 		goto no_route;
 	}
 
-	if (res.type == RTN_BROADCAST)
+	if (res->type == RTN_BROADCAST)
 		goto brd_input;
 
-	if (res.type == RTN_LOCAL) {
+	if (res->type == RTN_LOCAL) {
 		err = fib_validate_source(skb, saddr, daddr, tos,
 					  0, dev, in_dev, &itag);
 		if (err < 0)
@@ -1951,10 +1953,10 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 		err = -EHOSTUNREACH;
 		goto no_route;
 	}
-	if (res.type != RTN_UNICAST)
+	if (res->type != RTN_UNICAST)
 		goto martian_destination;
 
-	err = ip_mkroute_input(skb, &res, in_dev, daddr, saddr, tos);
+	err = ip_mkroute_input(skb, res, in_dev, daddr, saddr, tos);
 out:	return err;
 
 brd_input:
@@ -1968,14 +1970,14 @@ brd_input:
 			goto martian_source;
 	}
 	flags |= RTCF_BROADCAST;
-	res.type = RTN_BROADCAST;
+	res->type = RTN_BROADCAST;
 	RT_CACHE_STAT_INC(in_brd);
 
 local_input:
 	do_cache = false;
-	if (res.fi) {
+	if (res->fi) {
 		if (!itag) {
-			rth = rcu_dereference(FIB_RES_NH(res).nh_rth_input);
+			rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input);
 			if (rt_cache_valid(rth)) {
 				skb_dst_set_noref(skb, &rth->dst);
 				err = 0;
@@ -1986,7 +1988,7 @@ local_input:
 	}
 
 	rth = rt_dst_alloc(l3mdev_master_dev_rcu(dev) ? : net->loopback_dev,
-			   flags | RTCF_LOCAL, res.type,
+			   flags | RTCF_LOCAL, res->type,
 			   IN_DEV_CONF_GET(in_dev, NOPOLICY), false, do_cache);
 	if (!rth)
 		goto e_nobufs;
@@ -1996,18 +1998,18 @@ local_input:
 	rth->dst.tclassid = itag;
 #endif
 	rth->rt_is_input = 1;
-	if (res.table)
-		rth->rt_table_id = res.table->tb_id;
+	if (res->table)
+		rth->rt_table_id = res->table->tb_id;
 
 	RT_CACHE_STAT_INC(in_slow_tot);
-	if (res.type == RTN_UNREACHABLE) {
+	if (res->type == RTN_UNREACHABLE) {
 		rth->dst.input= ip_error;
 		rth->dst.error= -err;
 		rth->rt_flags 	&= ~RTCF_LOCAL;
 	}
 
 	if (do_cache) {
-		struct fib_nh *nh = &FIB_RES_NH(res);
+		struct fib_nh *nh = &FIB_RES_NH(*res);
 
 		rth->dst.lwtstate = lwtstate_get(nh->nh_lwtstate);
 		if (lwtunnel_input_redirect(rth->dst.lwtstate)) {
@@ -2027,9 +2029,9 @@ local_input:
 
 no_route:
 	RT_CACHE_STAT_INC(in_no_route);
-	res.type = RTN_UNREACHABLE;
-	res.fi = NULL;
-	res.table = NULL;
+	res->type = RTN_UNREACHABLE;
+	res->fi = NULL;
+	res->table = NULL;
 	goto local_input;
 
 	/*
@@ -2059,11 +2061,22 @@ martian_source:
 int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 			 u8 tos, struct net_device *dev)
 {
-	int res;
+	struct fib_result res;
+	int err;
 
 	tos &= IPTOS_RT_MASK;
 	rcu_read_lock();
+	err = ip_route_input_rcu(skb, daddr, saddr, tos, dev, &res);
+	rcu_read_unlock();
 
+	return err;
+}
+EXPORT_SYMBOL(ip_route_input_noref);
+
+/* called with rcu_read_lock held */
+int ip_route_input_rcu(struct sk_buff *skb, __be32 daddr, __be32 saddr,
+		       u8 tos, struct net_device *dev, struct fib_result *res)
+{
 	/* Multicast recognition logic is moved from route cache to here.
 	   The problem was that too many Ethernet cards have broken/missing
 	   hardware multicast filters :-( As result the host on multicasting
@@ -2078,6 +2091,7 @@ int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 	if (ipv4_is_multicast(daddr)) {
 		struct in_device *in_dev = __in_dev_get_rcu(dev);
 		int our = 0;
+		int err = -EINVAL;
 
 		if (in_dev)
 			our = ip_check_mc_rcu(in_dev, daddr, saddr,
@@ -2093,7 +2107,6 @@ int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 						      ip_hdr(skb)->protocol);
 		}
 
-		res = -EINVAL;
 		if (our
 #ifdef CONFIG_IP_MROUTE
 			||
@@ -2101,17 +2114,14 @@ int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 		     IN_DEV_MFORWARD(in_dev))
 #endif
 		   ) {
-			res = ip_route_input_mc(skb, daddr, saddr,
+			err = ip_route_input_mc(skb, daddr, saddr,
 						tos, dev, our);
 		}
-		rcu_read_unlock();
-		return res;
+		return err;
 	}
-	res = ip_route_input_slow(skb, daddr, saddr, tos, dev);
-	rcu_read_unlock();
-	return res;
+
+	return ip_route_input_slow(skb, daddr, saddr, tos, dev, res);
 }
-EXPORT_SYMBOL(ip_route_input_noref);
 
 /* called with rcu_read_lock() */
 static struct rtable *__mkroute_output(const struct fib_result *res,
@@ -2254,29 +2264,40 @@ add:
  * Major route resolver routine.
  */
 
-struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *fl4,
-					  const struct sk_buff *skb)
+struct rtable *ip_route_output_key_hash(struct net *net, struct flowi4 *fl4,
+					const struct sk_buff *skb)
 {
-	struct net_device *dev_out = NULL;
 	__u8 tos = RT_FL_TOS(fl4);
-	unsigned int flags = 0;
 	struct fib_result res;
 	struct rtable *rth;
-	int orig_oif;
-	int err = -ENETUNREACH;
 
 	res.tclassid	= 0;
 	res.fi		= NULL;
 	res.table	= NULL;
 
-	orig_oif = fl4->flowi4_oif;
-
 	fl4->flowi4_iif = LOOPBACK_IFINDEX;
 	fl4->flowi4_tos = tos & IPTOS_RT_MASK;
 	fl4->flowi4_scope = ((tos & RTO_ONLINK) ?
 			 RT_SCOPE_LINK : RT_SCOPE_UNIVERSE);
 
 	rcu_read_lock();
+	rth = ip_route_output_key_hash_rcu(net, fl4, &res, skb);
+	rcu_read_unlock();
+
+	return rth;
+}
+EXPORT_SYMBOL_GPL(ip_route_output_key_hash);
+
+struct rtable *ip_route_output_key_hash_rcu(struct net *net, struct flowi4 *fl4,
+					    struct fib_result *res,
+					    const struct sk_buff *skb)
+{
+	struct net_device *dev_out = NULL;
+	int orig_oif = fl4->flowi4_oif;
+	unsigned int flags = 0;
+	struct rtable *rth;
+	int err = -ENETUNREACH;
+
 	if (fl4->saddr) {
 		rth = ERR_PTR(-EINVAL);
 		if (ipv4_is_multicast(fl4->saddr) ||
@@ -2362,15 +2383,15 @@ struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *fl4,
 			fl4->daddr = fl4->saddr = htonl(INADDR_LOOPBACK);
 		dev_out = net->loopback_dev;
 		fl4->flowi4_oif = LOOPBACK_IFINDEX;
-		res.type = RTN_LOCAL;
+		res->type = RTN_LOCAL;
 		flags |= RTCF_LOCAL;
 		goto make_route;
 	}
 
-	err = fib_lookup(net, fl4, &res, 0);
+	err = fib_lookup(net, fl4, res, 0);
 	if (err) {
-		res.fi = NULL;
-		res.table = NULL;
+		res->fi = NULL;
+		res->table = NULL;
 		if (fl4->flowi4_oif &&
 		    (ipv4_is_multicast(fl4->daddr) ||
 		    !netif_index_is_l3_master(net, fl4->flowi4_oif))) {
@@ -2395,43 +2416,41 @@ struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *fl4,
 			if (fl4->saddr == 0)
 				fl4->saddr = inet_select_addr(dev_out, 0,
 							      RT_SCOPE_LINK);
-			res.type = RTN_UNICAST;
+			res->type = RTN_UNICAST;
 			goto make_route;
 		}
 		rth = ERR_PTR(err);
 		goto out;
 	}
 
-	if (res.type == RTN_LOCAL) {
+	if (res->type == RTN_LOCAL) {
 		if (!fl4->saddr) {
-			if (res.fi->fib_prefsrc)
-				fl4->saddr = res.fi->fib_prefsrc;
+			if (res->fi->fib_prefsrc)
+				fl4->saddr = res->fi->fib_prefsrc;
 			else
 				fl4->saddr = fl4->daddr;
 		}
 
 		/* L3 master device is the loopback for that domain */
-		dev_out = l3mdev_master_dev_rcu(FIB_RES_DEV(res)) ? :
+		dev_out = l3mdev_master_dev_rcu(FIB_RES_DEV(*res)) ? :
 			net->loopback_dev;
 		fl4->flowi4_oif = dev_out->ifindex;
 		flags |= RTCF_LOCAL;
 		goto make_route;
 	}
 
-	fib_select_path(net, &res, fl4, skb);
+	fib_select_path(net, res, fl4, skb);
 
-	dev_out = FIB_RES_DEV(res);
+	dev_out = FIB_RES_DEV(*res);
 	fl4->flowi4_oif = dev_out->ifindex;
 
 
 make_route:
-	rth = __mkroute_output(&res, fl4, orig_oif, dev_out, flags);
+	rth = __mkroute_output(res, fl4, orig_oif, dev_out, flags);
 
 out:
-	rcu_read_unlock();
 	return rth;
 }
-EXPORT_SYMBOL_GPL(__ip_route_output_key_hash);
 
 static struct dst_entry *ipv4_blackhole_dst_check(struct dst_entry *dst, u32 cookie)
 {
@@ -2525,9 +2544,10 @@ struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4,
 }
 EXPORT_SYMBOL_GPL(ip_route_output_flow);
 
+/* called with rcu_read_lock held */
 static int rt_fill_info(struct net *net,  __be32 dst, __be32 src, u32 table_id,
 			struct flowi4 *fl4, struct sk_buff *skb, u32 portid,
-			u32 seq, int event)
+			u32 seq)
 {
 	struct rtable *rt = skb_rtable(skb);
 	struct rtmsg *r;
@@ -2536,7 +2556,7 @@ static int rt_fill_info(struct net *net,  __be32 dst, __be32 src, u32 table_id,
 	u32 error;
 	u32 metrics[RTAX_MAX];
 
-	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*r), 0);
+	nlh = nlmsg_put(skb, portid, seq, RTM_NEWROUTE, sizeof(*r), 0);
 	if (!nlh)
 		return -EMSGSIZE;
 
@@ -2644,6 +2664,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
 	struct net *net = sock_net(in_skb->sk);
 	struct rtmsg *rtm;
 	struct nlattr *tb[RTA_MAX+1];
+	struct fib_result res = {};
 	struct rtable *rt = NULL;
 	struct flowi4 fl4;
 	__be32 dst = 0;
@@ -2700,10 +2721,12 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
 	fl4.flowi4_mark = mark;
 	fl4.flowi4_uid = uid;
 
+	rcu_read_lock();
+
 	if (iif) {
 		struct net_device *dev;
 
-		dev = __dev_get_by_index(net, iif);
+		dev = dev_get_by_index_rcu(net, iif);
 		if (!dev) {
 			err = -ENODEV;
 			goto errout_free;
@@ -2712,14 +2735,14 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
 		skb->protocol	= htons(ETH_P_IP);
 		skb->dev	= dev;
 		skb->mark	= mark;
-		err = ip_route_input(skb, dst, src, rtm->rtm_tos, dev);
+		err = ip_route_input_rcu(skb, dst, src, rtm->rtm_tos,
+					 dev, &res);
 
 		rt = skb_rtable(skb);
 		if (err == 0 && rt->dst.error)
 			err = -rt->dst.error;
 	} else {
-		rt = ip_route_output_key(net, &fl4);
-
+		rt = ip_route_output_key_hash_rcu(net, &fl4, &res, skb);
 		err = 0;
 		if (IS_ERR(rt))
 			err = PTR_ERR(rt);
@@ -2735,17 +2758,25 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
 	if (rtm->rtm_flags & RTM_F_LOOKUP_TABLE)
 		table_id = rt->rt_table_id;
 
-	err = rt_fill_info(net, dst, src, table_id, &fl4, skb,
-			   NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
-			   RTM_NEWROUTE);
+	if (rtm->rtm_flags & RTM_F_FIB_MATCH)
+		err = fib_dump_info(skb, NETLINK_CB(in_skb).portid,
+				    nlh->nlmsg_seq, RTM_NEWROUTE, table_id,
+				    rt->rt_type, res.prefix, res.prefixlen,
+				    fl4.flowi4_tos, res.fi, 0);
+	else
+		err = rt_fill_info(net, dst, src, table_id, &fl4, skb,
+				   NETLINK_CB(in_skb).portid, nlh->nlmsg_seq);
 	if (err < 0)
 		goto errout_free;
 
+	rcu_read_unlock();
+
 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
 errout:
 	return err;
 
 errout_free:
+	rcu_read_unlock();
 	kfree_skb(skb);
 	goto errout;
 }
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 0257d965f111..6426250a58ea 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -66,10 +66,10 @@ static u32 cookie_hash(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport,
  * Since subsequent timestamps use the normal tcp_time_stamp value, we
  * must make sure that the resulting initial timestamp is <= tcp_time_stamp.
  */
-__u32 cookie_init_timestamp(struct request_sock *req)
+u64 cookie_init_timestamp(struct request_sock *req)
 {
 	struct inet_request_sock *ireq;
-	u32 ts, ts_now = tcp_time_stamp;
+	u32 ts, ts_now = tcp_time_stamp_raw();
 	u32 options = 0;
 
 	ireq = inet_rsk(req);
@@ -88,7 +88,7 @@ __u32 cookie_init_timestamp(struct request_sock *req)
 		ts <<= TSBITS;
 		ts |= options;
 	}
-	return ts;
+	return (u64)ts * (USEC_PER_SEC / TCP_TS_HZ);
 }
 
 
@@ -343,7 +343,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
 	ireq->wscale_ok		= tcp_opt.wscale_ok;
 	ireq->tstamp_ok		= tcp_opt.saw_tstamp;
 	req->ts_recent		= tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0;
-	treq->snt_synack.v64	= 0;
+	treq->snt_synack	= 0;
 	treq->tfo_listener	= false;
 
 	ireq->ir_iif = inet_request_bound_dev_if(sk, skb);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index b5ea036ca781..87981fcdfcf2 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -386,7 +386,7 @@ void tcp_init_sock(struct sock *sk)
 
 	icsk->icsk_rto = TCP_TIMEOUT_INIT;
 	tp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT);
-	minmax_reset(&tp->rtt_min, tcp_time_stamp, ~0U);
+	minmax_reset(&tp->rtt_min, tcp_jiffies32, ~0U);
 
 	/* So many TCP implementations out there (incorrectly) count the
 	 * initial SYN frame in their delayed-ACK and congestion control
@@ -2186,7 +2186,7 @@ adjudge_to_death:
 
 
 	/* Now socket is owned by kernel and we acquire BH lock
-	   to finish close. No need to check for user refs.
+	 *  to finish close. No need to check for user refs.
 	 */
 	local_bh_disable();
 	bh_lock_sock(sk);
@@ -2480,7 +2480,8 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
 	case TCP_MAXSEG:
 		/* Values greater than interface MTU won't take effect. However
 		 * at the point when this call is done we typically don't yet
-		 * know which interface is going to be used */
+		 * know which interface is going to be used
+		 */
 		if (val && (val < TCP_MIN_MSS || val > MAX_TCP_WINDOW)) {
 			err = -EINVAL;
 			break;
@@ -2715,7 +2716,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
 		if (!tp->repair)
 			err = -EPERM;
 		else
-			tp->tsoffset = val - tcp_time_stamp;
+			tp->tsoffset = val - tcp_time_stamp_raw();
 		break;
 	case TCP_REPAIR_WINDOW:
 		err = tcp_repair_set_window(tp, optval, optlen);
@@ -2766,7 +2767,7 @@ static void tcp_get_info_chrono_stats(const struct tcp_sock *tp,
 	for (i = TCP_CHRONO_BUSY; i < __TCP_CHRONO_MAX; ++i) {
 		stats[i] = tp->chrono_stat[i - 1];
 		if (i == tp->chrono_type)
-			stats[i] += tcp_time_stamp - tp->chrono_start;
+			stats[i] += tcp_jiffies32 - tp->chrono_start;
 		stats[i] *= USEC_PER_SEC / HZ;
 		total += stats[i];
 	}
@@ -2850,7 +2851,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
 	info->tcpi_retrans = tp->retrans_out;
 	info->tcpi_fackets = tp->fackets_out;
 
-	now = tcp_time_stamp;
+	now = tcp_jiffies32;
 	info->tcpi_last_data_sent = jiffies_to_msecs(now - tp->lsndtime);
 	info->tcpi_last_data_recv = jiffies_to_msecs(now - icsk->icsk_ack.lrcvtime);
 	info->tcpi_last_ack_recv = jiffies_to_msecs(now - tp->rcv_tstamp);
@@ -3081,7 +3082,7 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
 		break;
 
 	case TCP_TIMESTAMP:
-		val = tcp_time_stamp + tp->tsoffset;
+		val = tcp_time_stamp_raw() + tp->tsoffset;
 		break;
 	case TCP_NOTSENT_LOWAT:
 		val = tp->notsent_lowat;
diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c
index b89bce4c721e..dbcc9352a48f 100644
--- a/net/ipv4/tcp_bbr.c
+++ b/net/ipv4/tcp_bbr.c
@@ -52,10 +52,9 @@
  * There is a public e-mail list for discussing BBR development and testing:
  *   https://groups.google.com/forum/#!forum/bbr-dev
  *
- * NOTE: BBR *must* be used with the fq qdisc ("man tc-fq") with pacing enabled,
- * since pacing is integral to the BBR design and implementation.
- * BBR without pacing would not function properly, and may incur unnecessary
- * high packet loss rates.
+ * NOTE: BBR might be used with the fq qdisc ("man tc-fq") with pacing enabled,
+ * otherwise TCP stack falls back to an internal pacing using one high
+ * resolution timer per TCP socket and may use more resources.
  */
 #include <linux/module.h>
 #include <net/tcp.h>
@@ -92,7 +91,7 @@ struct bbr {
 	struct minmax bw;	/* Max recent delivery rate in pkts/uS << 24 */
 	u32	rtt_cnt;	    /* count of packet-timed rounds elapsed */
 	u32     next_rtt_delivered; /* scb->tx.delivered at end of round */
-	struct skb_mstamp cycle_mstamp;  /* time of this cycle phase start */
+	u64	cycle_mstamp;	     /* time of this cycle phase start */
 	u32     mode:3,		     /* current bbr_mode in state machine */
 		prev_ca_state:3,     /* CA state on previous ACK */
 		packet_conservation:1,  /* use packet conservation? */
@@ -412,7 +411,7 @@ static bool bbr_is_next_cycle_phase(struct sock *sk,
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct bbr *bbr = inet_csk_ca(sk);
 	bool is_full_length =
-		skb_mstamp_us_delta(&tp->delivered_mstamp, &bbr->cycle_mstamp) >
+		tcp_stamp_us_delta(tp->delivered_mstamp, bbr->cycle_mstamp) >
 		bbr->min_rtt_us;
 	u32 inflight, bw;
 
@@ -498,7 +497,7 @@ static void bbr_reset_lt_bw_sampling_interval(struct sock *sk)
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct bbr *bbr = inet_csk_ca(sk);
 
-	bbr->lt_last_stamp = tp->delivered_mstamp.stamp_jiffies;
+	bbr->lt_last_stamp = div_u64(tp->delivered_mstamp, USEC_PER_MSEC);
 	bbr->lt_last_delivered = tp->delivered;
 	bbr->lt_last_lost = tp->lost;
 	bbr->lt_rtt_cnt = 0;
@@ -552,7 +551,7 @@ static void bbr_lt_bw_sampling(struct sock *sk, const struct rate_sample *rs)
 	struct bbr *bbr = inet_csk_ca(sk);
 	u32 lost, delivered;
 	u64 bw;
-	s32 t;
+	u32 t;
 
 	if (bbr->lt_use_bw) {	/* already using long-term rate, lt_bw? */
 		if (bbr->mode == BBR_PROBE_BW && bbr->round_start &&
@@ -604,15 +603,15 @@ static void bbr_lt_bw_sampling(struct sock *sk, const struct rate_sample *rs)
 		return;
 
 	/* Find average delivery rate in this sampling interval. */
-	t = (s32)(tp->delivered_mstamp.stamp_jiffies - bbr->lt_last_stamp);
-	if (t < 1)
-		return;		/* interval is less than one jiffy, so wait */
-	t = jiffies_to_usecs(t);
-	/* Interval long enough for jiffies_to_usecs() to return a bogus 0? */
-	if (t < 1) {
+	t = div_u64(tp->delivered_mstamp, USEC_PER_MSEC) - bbr->lt_last_stamp;
+	if ((s32)t < 1)
+		return;		/* interval is less than one ms, so wait */
+	/* Check if can multiply without overflow */
+	if (t >= ~0U / USEC_PER_MSEC) {
 		bbr_reset_lt_bw_sampling(sk);  /* interval too long; reset */
 		return;
 	}
+	t *= USEC_PER_MSEC;
 	bw = (u64)delivered * BW_UNIT;
 	do_div(bw, t);
 	bbr_lt_bw_interval_done(sk, bw);
@@ -731,12 +730,12 @@ static void bbr_update_min_rtt(struct sock *sk, const struct rate_sample *rs)
 	bool filter_expired;
 
 	/* Track min RTT seen in the min_rtt_win_sec filter window: */
-	filter_expired = after(tcp_time_stamp,
+	filter_expired = after(tcp_jiffies32,
 			       bbr->min_rtt_stamp + bbr_min_rtt_win_sec * HZ);
 	if (rs->rtt_us >= 0 &&
 	    (rs->rtt_us <= bbr->min_rtt_us || filter_expired)) {
 		bbr->min_rtt_us = rs->rtt_us;
-		bbr->min_rtt_stamp = tcp_time_stamp;
+		bbr->min_rtt_stamp = tcp_jiffies32;
 	}
 
 	if (bbr_probe_rtt_mode_ms > 0 && filter_expired &&
@@ -755,7 +754,7 @@ static void bbr_update_min_rtt(struct sock *sk, const struct rate_sample *rs)
 		/* Maintain min packets in flight for max(200 ms, 1 round). */
 		if (!bbr->probe_rtt_done_stamp &&
 		    tcp_packets_in_flight(tp) <= bbr_cwnd_min_target) {
-			bbr->probe_rtt_done_stamp = tcp_time_stamp +
+			bbr->probe_rtt_done_stamp = tcp_jiffies32 +
 				msecs_to_jiffies(bbr_probe_rtt_mode_ms);
 			bbr->probe_rtt_round_done = 0;
 			bbr->next_rtt_delivered = tp->delivered;
@@ -763,8 +762,8 @@ static void bbr_update_min_rtt(struct sock *sk, const struct rate_sample *rs)
 			if (bbr->round_start)
 				bbr->probe_rtt_round_done = 1;
 			if (bbr->probe_rtt_round_done &&
-			    after(tcp_time_stamp, bbr->probe_rtt_done_stamp)) {
-				bbr->min_rtt_stamp = tcp_time_stamp;
+			    after(tcp_jiffies32, bbr->probe_rtt_done_stamp)) {
+				bbr->min_rtt_stamp = tcp_jiffies32;
 				bbr->restore_cwnd = 1;  /* snap to prior_cwnd */
 				bbr_reset_mode(sk);
 			}
@@ -811,7 +810,7 @@ static void bbr_init(struct sock *sk)
 	bbr->probe_rtt_done_stamp = 0;
 	bbr->probe_rtt_round_done = 0;
 	bbr->min_rtt_us = tcp_min_rtt(tp);
-	bbr->min_rtt_stamp = tcp_time_stamp;
+	bbr->min_rtt_stamp = tcp_jiffies32;
 
 	minmax_reset(&bbr->bw, bbr->rtt_cnt, 0);  /* init max bw to 0 */
 
@@ -826,10 +825,12 @@ static void bbr_init(struct sock *sk)
 	bbr->idle_restart = 0;
 	bbr->full_bw = 0;
 	bbr->full_bw_cnt = 0;
-	bbr->cycle_mstamp.v64 = 0;
+	bbr->cycle_mstamp = 0;
 	bbr->cycle_idx = 0;
 	bbr_reset_lt_bw_sampling(sk);
 	bbr_reset_startup_mode(sk);
+
+	cmpxchg(&sk->sk_pacing_status, SK_PACING_NONE, SK_PACING_NEEDED);
 }
 
 static u32 bbr_sndbuf_expand(struct sock *sk)
diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c
index 36087bca9f48..609965f0e298 100644
--- a/net/ipv4/tcp_bic.c
+++ b/net/ipv4/tcp_bic.c
@@ -84,14 +84,14 @@ static void bictcp_init(struct sock *sk)
 static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
 {
 	if (ca->last_cwnd == cwnd &&
-	    (s32)(tcp_time_stamp - ca->last_time) <= HZ / 32)
+	    (s32)(tcp_jiffies32 - ca->last_time) <= HZ / 32)
 		return;
 
 	ca->last_cwnd = cwnd;
-	ca->last_time = tcp_time_stamp;
+	ca->last_time = tcp_jiffies32;
 
 	if (ca->epoch_start == 0) /* record the beginning of an epoch */
-		ca->epoch_start = tcp_time_stamp;
+		ca->epoch_start = tcp_jiffies32;
 
 	/* start off normal */
 	if (cwnd <= low_window) {
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index 0683ba447d77..57ae5b5ae643 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -155,7 +155,7 @@ static void bictcp_cwnd_event(struct sock *sk, enum tcp_ca_event event)
 {
 	if (event == CA_EVENT_TX_START) {
 		struct bictcp *ca = inet_csk_ca(sk);
-		u32 now = tcp_time_stamp;
+		u32 now = tcp_jiffies32;
 		s32 delta;
 
 		delta = now - tcp_sk(sk)->lsndtime;
@@ -231,21 +231,21 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd, u32 acked)
 	ca->ack_cnt += acked;	/* count the number of ACKed packets */
 
 	if (ca->last_cwnd == cwnd &&
-	    (s32)(tcp_time_stamp - ca->last_time) <= HZ / 32)
+	    (s32)(tcp_jiffies32 - ca->last_time) <= HZ / 32)
 		return;
 
 	/* The CUBIC function can update ca->cnt at most once per jiffy.
 	 * On all cwnd reduction events, ca->epoch_start is set to 0,
 	 * which will force a recalculation of ca->cnt.
 	 */
-	if (ca->epoch_start && tcp_time_stamp == ca->last_time)
+	if (ca->epoch_start && tcp_jiffies32 == ca->last_time)
 		goto tcp_friendliness;
 
 	ca->last_cwnd = cwnd;
-	ca->last_time = tcp_time_stamp;
+	ca->last_time = tcp_jiffies32;
 
 	if (ca->epoch_start == 0) {
-		ca->epoch_start = tcp_time_stamp;	/* record beginning */
+		ca->epoch_start = tcp_jiffies32;	/* record beginning */
 		ca->ack_cnt = acked;			/* start counting */
 		ca->tcp_cwnd = cwnd;			/* syn with cubic */
 
@@ -276,7 +276,7 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd, u32 acked)
 	 * if the cwnd < 1 million packets !!!
 	 */
 
-	t = (s32)(tcp_time_stamp - ca->epoch_start);
+	t = (s32)(tcp_jiffies32 - ca->epoch_start);
 	t += msecs_to_jiffies(ca->delay_min >> 3);
 	/* change the unit from HZ to bictcp_HZ */
 	t <<= BICTCP_HZ;
@@ -448,7 +448,7 @@ static void bictcp_acked(struct sock *sk, const struct ack_sample *sample)
 		return;
 
 	/* Discard delay samples right after fast recovery */
-	if (ca->epoch_start && (s32)(tcp_time_stamp - ca->epoch_start) < HZ)
+	if (ca->epoch_start && (s32)(tcp_jiffies32 - ca->epoch_start) < HZ)
 		return;
 
 	delay = (sample->rtt_us << 3) / USEC_PER_MSEC;
diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c
index 4a4d8e76738f..3eb78cde6ff0 100644
--- a/net/ipv4/tcp_htcp.c
+++ b/net/ipv4/tcp_htcp.c
@@ -104,7 +104,7 @@ static void measure_achieved_throughput(struct sock *sk,
 	const struct inet_connection_sock *icsk = inet_csk(sk);
 	const struct tcp_sock *tp = tcp_sk(sk);
 	struct htcp *ca = inet_csk_ca(sk);
-	u32 now = tcp_time_stamp;
+	u32 now = tcp_jiffies32;
 
 	if (icsk->icsk_ca_state == TCP_CA_Open)
 		ca->pkts_acked = sample->pkts_acked;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 174d4376baa5..4ea8ec5c7bb4 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -112,6 +112,7 @@ int sysctl_tcp_invalid_ratelimit __read_mostly = HZ/2;
 #define FLAG_DSACKING_ACK	0x800 /* SACK blocks contained D-SACK info */
 #define FLAG_SACK_RENEGING	0x2000 /* snd_una advanced to a sacked seq */
 #define FLAG_UPDATE_TS_RECENT	0x4000 /* tcp_replace_ts_recent() */
+#define FLAG_NO_CHALLENGE_ACK	0x8000 /* do not call tcp_send_challenge_ack()	*/
 
 #define FLAG_ACKED		(FLAG_DATA_ACKED|FLAG_SYN_ACKED)
 #define FLAG_NOT_DUP		(FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED)
@@ -441,7 +442,7 @@ void tcp_init_buffer_space(struct sock *sk)
 		tcp_sndbuf_expand(sk);
 
 	tp->rcvq_space.space = tp->rcv_wnd;
-	skb_mstamp_get(&tp->tcp_mstamp);
+	tcp_mstamp_refresh(tp);
 	tp->rcvq_space.time = tp->tcp_mstamp;
 	tp->rcvq_space.seq = tp->copied_seq;
 
@@ -463,7 +464,7 @@ void tcp_init_buffer_space(struct sock *sk)
 		tp->window_clamp = max(2 * tp->advmss, maxwin - tp->advmss);
 
 	tp->rcv_ssthresh = min(tp->rcv_ssthresh, tp->window_clamp);
-	tp->snd_cwnd_stamp = tcp_time_stamp;
+	tp->snd_cwnd_stamp = tcp_jiffies32;
 }
 
 /* 5. Recalculate window clamp after socket hit its memory bounds. */
@@ -555,11 +556,11 @@ static inline void tcp_rcv_rtt_measure(struct tcp_sock *tp)
 {
 	u32 delta_us;
 
-	if (tp->rcv_rtt_est.time.v64 == 0)
+	if (tp->rcv_rtt_est.time == 0)
 		goto new_measure;
 	if (before(tp->rcv_nxt, tp->rcv_rtt_est.seq))
 		return;
-	delta_us = skb_mstamp_us_delta(&tp->tcp_mstamp, &tp->rcv_rtt_est.time);
+	delta_us = tcp_stamp_us_delta(tp->tcp_mstamp, tp->rcv_rtt_est.time);
 	tcp_rcv_rtt_update(tp, delta_us, 1);
 
 new_measure:
@@ -571,13 +572,15 @@ static inline void tcp_rcv_rtt_measure_ts(struct sock *sk,
 					  const struct sk_buff *skb)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
+
 	if (tp->rx_opt.rcv_tsecr &&
 	    (TCP_SKB_CB(skb)->end_seq -
-	     TCP_SKB_CB(skb)->seq >= inet_csk(sk)->icsk_ack.rcv_mss))
-		tcp_rcv_rtt_update(tp,
-				   jiffies_to_usecs(tcp_time_stamp -
-						    tp->rx_opt.rcv_tsecr),
-				   0);
+	     TCP_SKB_CB(skb)->seq >= inet_csk(sk)->icsk_ack.rcv_mss)) {
+		u32 delta = tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr;
+		u32 delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
+
+		tcp_rcv_rtt_update(tp, delta_us, 0);
+	}
 }
 
 /*
@@ -590,7 +593,7 @@ void tcp_rcv_space_adjust(struct sock *sk)
 	int time;
 	int copied;
 
-	time = skb_mstamp_us_delta(&tp->tcp_mstamp, &tp->rcvq_space.time);
+	time = tcp_stamp_us_delta(tp->tcp_mstamp, tp->rcvq_space.time);
 	if (time < (tp->rcv_rtt_est.rtt_us >> 3) || tp->rcv_rtt_est.rtt_us == 0)
 		return;
 
@@ -672,7 +675,7 @@ static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb)
 
 	tcp_rcv_rtt_measure(tp);
 
-	now = tcp_time_stamp;
+	now = tcp_jiffies32;
 
 	if (!icsk->icsk_ack.ato) {
 		/* The _first_ data packet received, initialize
@@ -885,6 +888,9 @@ static void tcp_update_reordering(struct sock *sk, const int metric,
 	struct tcp_sock *tp = tcp_sk(sk);
 	int mib_idx;
 
+	if (WARN_ON_ONCE(metric < 0))
+		return;
+
 	if (metric > tp->reordering) {
 		tp->reordering = min(sysctl_tcp_max_reordering, metric);
 
@@ -1134,8 +1140,8 @@ struct tcp_sacktag_state {
 	 * that was SACKed. RTO needs the earliest RTT to stay conservative,
 	 * but congestion control should still get an accurate delay signal.
 	 */
-	struct skb_mstamp first_sackt;
-	struct skb_mstamp last_sackt;
+	u64	first_sackt;
+	u64	last_sackt;
 	struct rate_sample *rate;
 	int	flag;
 };
@@ -1200,7 +1206,7 @@ static u8 tcp_sacktag_one(struct sock *sk,
 			  struct tcp_sacktag_state *state, u8 sacked,
 			  u32 start_seq, u32 end_seq,
 			  int dup_sack, int pcount,
-			  const struct skb_mstamp *xmit_time)
+			  u64 xmit_time)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	int fack_count = state->fack_count;
@@ -1242,9 +1248,9 @@ static u8 tcp_sacktag_one(struct sock *sk,
 							   state->reord);
 				if (!after(end_seq, tp->high_seq))
 					state->flag |= FLAG_ORIG_SACK_ACKED;
-				if (state->first_sackt.v64 == 0)
-					state->first_sackt = *xmit_time;
-				state->last_sackt = *xmit_time;
+				if (state->first_sackt == 0)
+					state->first_sackt = xmit_time;
+				state->last_sackt = xmit_time;
 			}
 
 			if (sacked & TCPCB_LOST) {
@@ -1304,7 +1310,7 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
 	 */
 	tcp_sacktag_one(sk, state, TCP_SKB_CB(skb)->sacked,
 			start_seq, end_seq, dup_sack, pcount,
-			&skb->skb_mstamp);
+			skb->skb_mstamp);
 	tcp_rate_skb_delivered(sk, skb, state->rate);
 
 	if (skb == tp->lost_skb_hint)
@@ -1356,8 +1362,8 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
 		tcp_advance_highest_sack(sk, skb);
 
 	tcp_skb_collapse_tstamp(prev, skb);
-	if (unlikely(TCP_SKB_CB(prev)->tx.delivered_mstamp.v64))
-		TCP_SKB_CB(prev)->tx.delivered_mstamp.v64 = 0;
+	if (unlikely(TCP_SKB_CB(prev)->tx.delivered_mstamp))
+		TCP_SKB_CB(prev)->tx.delivered_mstamp = 0;
 
 	tcp_unlink_write_queue(skb, sk);
 	sk_wmem_free_skb(sk, skb);
@@ -1587,7 +1593,7 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk,
 						TCP_SKB_CB(skb)->end_seq,
 						dup_sack,
 						tcp_skb_pcount(skb),
-						&skb->skb_mstamp);
+						skb->skb_mstamp);
 			tcp_rate_skb_delivered(sk, skb, state->rate);
 
 			if (!before(TCP_SKB_CB(skb)->seq,
@@ -1954,7 +1960,7 @@ void tcp_enter_loss(struct sock *sk)
 	}
 	tp->snd_cwnd	   = 1;
 	tp->snd_cwnd_cnt   = 0;
-	tp->snd_cwnd_stamp = tcp_time_stamp;
+	tp->snd_cwnd_stamp = tcp_jiffies32;
 
 	tp->retrans_out = 0;
 	tp->lost_out = 0;
@@ -2383,7 +2389,7 @@ static void tcp_undo_cwnd_reduction(struct sock *sk, bool unmark_loss)
 			tcp_ecn_withdraw_cwr(tp);
 		}
 	}
-	tp->snd_cwnd_stamp = tcp_time_stamp;
+	tp->snd_cwnd_stamp = tcp_jiffies32;
 	tp->undo_marker = 0;
 }
 
@@ -2520,7 +2526,7 @@ static inline void tcp_end_cwnd_reduction(struct sock *sk)
 	if (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR ||
 	    (tp->undo_marker && tp->snd_ssthresh < TCP_INFINITE_SSTHRESH)) {
 		tp->snd_cwnd = tp->snd_ssthresh;
-		tp->snd_cwnd_stamp = tcp_time_stamp;
+		tp->snd_cwnd_stamp = tcp_jiffies32;
 	}
 	tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR);
 }
@@ -2590,7 +2596,7 @@ static void tcp_mtup_probe_success(struct sock *sk)
 		       tcp_mss_to_mtu(sk, tp->mss_cache) /
 		       icsk->icsk_mtup.probe_size;
 	tp->snd_cwnd_cnt = 0;
-	tp->snd_cwnd_stamp = tcp_time_stamp;
+	tp->snd_cwnd_stamp = tcp_jiffies32;
 	tp->snd_ssthresh = tcp_current_ssthresh(sk);
 
 	icsk->icsk_mtup.search_low = icsk->icsk_mtup.probe_size;
@@ -2911,13 +2917,13 @@ static void tcp_update_rtt_min(struct sock *sk, u32 rtt_us)
 	struct tcp_sock *tp = tcp_sk(sk);
 	u32 wlen = sysctl_tcp_min_rtt_wlen * HZ;
 
-	minmax_running_min(&tp->rtt_min, wlen, tcp_time_stamp,
+	minmax_running_min(&tp->rtt_min, wlen, tcp_jiffies32,
 			   rtt_us ? : jiffies_to_usecs(1));
 }
 
-static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag,
-				      long seq_rtt_us, long sack_rtt_us,
-				      long ca_rtt_us)
+static bool tcp_ack_update_rtt(struct sock *sk, const int flag,
+			       long seq_rtt_us, long sack_rtt_us,
+			       long ca_rtt_us, struct rate_sample *rs)
 {
 	const struct tcp_sock *tp = tcp_sk(sk);
 
@@ -2936,9 +2942,13 @@ static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag,
 	 * See draft-ietf-tcplw-high-performance-00, section 3.3.
 	 */
 	if (seq_rtt_us < 0 && tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
-	    flag & FLAG_ACKED)
-		seq_rtt_us = ca_rtt_us = jiffies_to_usecs(tcp_time_stamp -
-							  tp->rx_opt.rcv_tsecr);
+	    flag & FLAG_ACKED) {
+		u32 delta = tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr;
+		u32 delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
+
+		seq_rtt_us = ca_rtt_us = delta_us;
+	}
+	rs->rtt_us = ca_rtt_us; /* RTT of last (S)ACKed packet (or -1) */
 	if (seq_rtt_us < 0)
 		return false;
 
@@ -2958,16 +2968,13 @@ static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag,
 /* Compute time elapsed between (last) SYNACK and the ACK completing 3WHS. */
 void tcp_synack_rtt_meas(struct sock *sk, struct request_sock *req)
 {
+	struct rate_sample rs;
 	long rtt_us = -1L;
 
-	if (req && !req->num_retrans && tcp_rsk(req)->snt_synack.v64) {
-		struct skb_mstamp now;
-
-		skb_mstamp_get(&now);
-		rtt_us = skb_mstamp_us_delta(&now, &tcp_rsk(req)->snt_synack);
-	}
+	if (req && !req->num_retrans && tcp_rsk(req)->snt_synack)
+		rtt_us = tcp_stamp_us_delta(tcp_clock_us(), tcp_rsk(req)->snt_synack);
 
-	tcp_ack_update_rtt(sk, FLAG_SYN_ACKED, rtt_us, -1L, rtt_us);
+	tcp_ack_update_rtt(sk, FLAG_SYN_ACKED, rtt_us, -1L, rtt_us, &rs);
 }
 
 
@@ -2976,7 +2983,7 @@ static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
 	const struct inet_connection_sock *icsk = inet_csk(sk);
 
 	icsk->icsk_ca_ops->cong_avoid(sk, ack, acked);
-	tcp_sk(sk)->snd_cwnd_stamp = tcp_time_stamp;
+	tcp_sk(sk)->snd_cwnd_stamp = tcp_jiffies32;
 }
 
 /* Restart timer after forward progress on connection.
@@ -3001,14 +3008,14 @@ void tcp_rearm_rto(struct sock *sk)
 		if (icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
 		    icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
 			struct sk_buff *skb = tcp_write_queue_head(sk);
-			const u32 rto_time_stamp =
-				tcp_skb_timestamp(skb) + rto;
-			s32 delta = (s32)(rto_time_stamp - tcp_time_stamp);
-			/* delta may not be positive if the socket is locked
+			u64 rto_time_stamp = skb->skb_mstamp +
+					     jiffies_to_usecs(rto);
+			s64 delta_us = rto_time_stamp - tp->tcp_mstamp;
+			/* delta_us may not be positive if the socket is locked
 			 * when the retrans timer fires and is rescheduled.
 			 */
-			if (delta > 0)
-				rto = delta;
+			if (delta_us > 0)
+				rto = usecs_to_jiffies(delta_us);
 		}
 		inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, rto,
 					  TCP_RTO_MAX);
@@ -3060,9 +3067,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
 			       struct tcp_sacktag_state *sack)
 {
 	const struct inet_connection_sock *icsk = inet_csk(sk);
-	struct skb_mstamp first_ackt, last_ackt;
+	u64 first_ackt, last_ackt;
 	struct tcp_sock *tp = tcp_sk(sk);
-	struct skb_mstamp *now = &tp->tcp_mstamp;
 	u32 prior_sacked = tp->sacked_out;
 	u32 reord = tp->packets_out;
 	bool fully_acked = true;
@@ -3075,7 +3081,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
 	bool rtt_update;
 	int flag = 0;
 
-	first_ackt.v64 = 0;
+	first_ackt = 0;
 
 	while ((skb = tcp_write_queue_head(sk)) && skb != tcp_send_head(sk)) {
 		struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
@@ -3106,8 +3112,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
 			flag |= FLAG_RETRANS_DATA_ACKED;
 		} else if (!(sacked & TCPCB_SACKED_ACKED)) {
 			last_ackt = skb->skb_mstamp;
-			WARN_ON_ONCE(last_ackt.v64 == 0);
-			if (!first_ackt.v64)
+			WARN_ON_ONCE(last_ackt == 0);
+			if (!first_ackt)
 				first_ackt = last_ackt;
 
 			last_in_flight = TCP_SKB_CB(skb)->tx.in_flight;
@@ -3122,7 +3128,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
 			tp->delivered += acked_pcount;
 			if (!tcp_skb_spurious_retrans(tp, skb))
 				tcp_rack_advance(tp, sacked, scb->end_seq,
-						 &skb->skb_mstamp);
+						 skb->skb_mstamp);
 		}
 		if (sacked & TCPCB_LOST)
 			tp->lost_out -= acked_pcount;
@@ -3165,17 +3171,16 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
 	if (skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
 		flag |= FLAG_SACK_RENEGING;
 
-	if (likely(first_ackt.v64) && !(flag & FLAG_RETRANS_DATA_ACKED)) {
-		seq_rtt_us = skb_mstamp_us_delta(now, &first_ackt);
-		ca_rtt_us = skb_mstamp_us_delta(now, &last_ackt);
+	if (likely(first_ackt) && !(flag & FLAG_RETRANS_DATA_ACKED)) {
+		seq_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, first_ackt);
+		ca_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, last_ackt);
 	}
-	if (sack->first_sackt.v64) {
-		sack_rtt_us = skb_mstamp_us_delta(now, &sack->first_sackt);
-		ca_rtt_us = skb_mstamp_us_delta(now, &sack->last_sackt);
+	if (sack->first_sackt) {
+		sack_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, sack->first_sackt);
+		ca_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, sack->last_sackt);
 	}
-	sack->rate->rtt_us = ca_rtt_us; /* RTT of last (S)ACKed packet, or -1 */
 	rtt_update = tcp_ack_update_rtt(sk, flag, seq_rtt_us, sack_rtt_us,
-					ca_rtt_us);
+					ca_rtt_us, sack->rate);
 
 	if (flag & FLAG_ACKED) {
 		tcp_rearm_rto(sk);
@@ -3201,7 +3206,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
 		tp->fackets_out -= min(pkts_acked, tp->fackets_out);
 
 	} else if (skb && rtt_update && sack_rtt_us >= 0 &&
-		   sack_rtt_us > skb_mstamp_us_delta(now, &skb->skb_mstamp)) {
+		   sack_rtt_us > tcp_stamp_us_delta(tp->tcp_mstamp, skb->skb_mstamp)) {
 		/* Do not re-arm RTO if the sack RTT is measured from data sent
 		 * after when the head was last (re)transmitted. Otherwise the
 		 * timeout may continue to extend in loss recovery.
@@ -3211,7 +3216,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
 
 	if (icsk->icsk_ca_ops->pkts_acked) {
 		struct ack_sample sample = { .pkts_acked = pkts_acked,
-					     .rtt_us = ca_rtt_us,
+					     .rtt_us = sack->rate->rtt_us,
 					     .in_flight = last_in_flight };
 
 		icsk->icsk_ca_ops->pkts_acked(sk, &sample);
@@ -3390,7 +3395,7 @@ static bool __tcp_oow_rate_limited(struct net *net, int mib_idx,
 				   u32 *last_oow_ack_time)
 {
 	if (*last_oow_ack_time) {
-		s32 elapsed = (s32)(tcp_time_stamp - *last_oow_ack_time);
+		s32 elapsed = (s32)(tcp_jiffies32 - *last_oow_ack_time);
 
 		if (0 <= elapsed && elapsed < sysctl_tcp_invalid_ratelimit) {
 			NET_INC_STATS(net, mib_idx);
@@ -3398,7 +3403,7 @@ static bool __tcp_oow_rate_limited(struct net *net, int mib_idx,
 		}
 	}
 
-	*last_oow_ack_time = tcp_time_stamp;
+	*last_oow_ack_time = tcp_jiffies32;
 
 	return false;	/* not rate-limited: go ahead, send dupack now! */
 }
@@ -3553,7 +3558,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
 	int acked = 0; /* Number of packets newly acked */
 	int rexmit = REXMIT_NONE; /* Flag to (re)transmit to recover losses */
 
-	sack_state.first_sackt.v64 = 0;
+	sack_state.first_sackt = 0;
 	sack_state.rate = &rs;
 
 	/* We very likely will need to access write queue head. */
@@ -3565,7 +3570,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
 	if (before(ack, prior_snd_una)) {
 		/* RFC 5961 5.2 [Blind Data Injection Attack].[Mitigation] */
 		if (before(ack, prior_snd_una - tp->max_window)) {
-			tcp_send_challenge_ack(sk, skb);
+			if (!(flag & FLAG_NO_CHALLENGE_ACK))
+				tcp_send_challenge_ack(sk, skb);
 			return -1;
 		}
 		goto old_ack;
@@ -3636,7 +3642,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
 	 */
 	sk->sk_err_soft = 0;
 	icsk->icsk_probes_out = 0;
-	tp->rcv_tstamp = tcp_time_stamp;
+	tp->rcv_tstamp = tcp_jiffies32;
 	if (!prior_packets)
 		goto no_queue;
 
@@ -5019,7 +5025,7 @@ static void tcp_new_space(struct sock *sk)
 
 	if (tcp_should_expand_sndbuf(sk)) {
 		tcp_sndbuf_expand(sk);
-		tp->snd_cwnd_stamp = tcp_time_stamp;
+		tp->snd_cwnd_stamp = tcp_jiffies32;
 	}
 
 	sk->sk_write_space(sk);
@@ -5356,7 +5362,7 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 
-	skb_mstamp_get(&tp->tcp_mstamp);
+	tcp_mstamp_refresh(tp);
 	if (unlikely(!sk->sk_rx_dst))
 		inet_csk(sk)->icsk_af_ops->sk_rx_dst_set(sk, skb);
 	/*
@@ -5554,7 +5560,7 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb)
 	struct inet_connection_sock *icsk = inet_csk(sk);
 
 	tcp_set_state(sk, TCP_ESTABLISHED);
-	icsk->icsk_ack.lrcvtime = tcp_time_stamp;
+	icsk->icsk_ack.lrcvtime = tcp_jiffies32;
 
 	if (skb) {
 		icsk->icsk_af_ops->sk_rx_dst_set(sk, skb);
@@ -5571,7 +5577,7 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb)
 	/* Prevent spurious tcp_cwnd_restart() on first data
 	 * packet.
 	 */
-	tp->lsndtime = tcp_time_stamp;
+	tp->lsndtime = tcp_jiffies32;
 
 	tcp_init_buffer_space(sk);
 
@@ -5672,7 +5678,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
 
 		if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
 		    !between(tp->rx_opt.rcv_tsecr, tp->retrans_stamp,
-			     tcp_time_stamp)) {
+			     tcp_time_stamp(tp))) {
 			NET_INC_STATS(sock_net(sk),
 					LINUX_MIB_PAWSACTIVEREJECTED);
 			goto reset_and_undo;
@@ -5917,7 +5923,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
 
 	case TCP_SYN_SENT:
 		tp->rx_opt.saw_tstamp = 0;
-		skb_mstamp_get(&tp->tcp_mstamp);
+		tcp_mstamp_refresh(tp);
 		queued = tcp_rcv_synsent_state_process(sk, skb, th);
 		if (queued >= 0)
 			return queued;
@@ -5929,7 +5935,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
 		return 0;
 	}
 
-	skb_mstamp_get(&tp->tcp_mstamp);
+	tcp_mstamp_refresh(tp);
 	tp->rx_opt.saw_tstamp = 0;
 	req = tp->fastopen_rsk;
 	if (req) {
@@ -5948,13 +5954,17 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
 
 	/* step 5: check the ACK field */
 	acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH |
-				      FLAG_UPDATE_TS_RECENT) > 0;
+				      FLAG_UPDATE_TS_RECENT |
+				      FLAG_NO_CHALLENGE_ACK) > 0;
 
+	if (!acceptable) {
+		if (sk->sk_state == TCP_SYN_RECV)
+			return 1;	/* send one RST */
+		tcp_send_challenge_ack(sk, skb);
+		goto discard;
+	}
 	switch (sk->sk_state) {
 	case TCP_SYN_RECV:
-		if (!acceptable)
-			return 1;
-
 		if (!tp->srtt_us)
 			tcp_synack_rtt_meas(sk, req);
 
@@ -6008,7 +6018,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
 			tcp_update_pacing_rate(sk);
 
 		/* Prevent spurious tcp_cwnd_restart() on first data packet */
-		tp->lsndtime = tcp_time_stamp;
+		tp->lsndtime = tcp_jiffies32;
 
 		tcp_initialize_rcv_mss(sk);
 		tcp_fast_path_on(tp);
@@ -6023,14 +6033,6 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
 		 * our SYNACK so stop the SYNACK timer.
 		 */
 		if (req) {
-			/* Return RST if ack_seq is invalid.
-			 * Note that RFC793 only says to generate a
-			 * DUPACK for it but for TCP Fast Open it seems
-			 * better to treat this case like TCP_SYN_RECV
-			 * above.
-			 */
-			if (!acceptable)
-				return 1;
 			/* We no longer need the request sock. */
 			reqsk_fastopen_remove(sk, req, false);
 			tcp_rearm_rto(sk);
@@ -6202,7 +6204,7 @@ static void tcp_openreq_init(struct request_sock *req,
 	req->cookie_ts = 0;
 	tcp_rsk(req)->rcv_isn = TCP_SKB_CB(skb)->seq;
 	tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
-	skb_mstamp_get(&tcp_rsk(req)->snt_synack);
+	tcp_rsk(req)->snt_synack = tcp_clock_us();
 	tcp_rsk(req)->last_oow_ack_time = 0;
 	req->mss = rx_opt->mss_clamp;
 	req->ts_recent = rx_opt->saw_tstamp ? rx_opt->rcv_tsval : 0;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 5ab2aac5ca19..191b2f78b19d 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -376,8 +376,9 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
 	struct sock *sk;
 	struct sk_buff *skb;
 	struct request_sock *fastopen;
-	__u32 seq, snd_una;
-	__u32 remaining;
+	u32 seq, snd_una;
+	s32 remaining;
+	u32 delta_us;
 	int err;
 	struct net *net = dev_net(icmp_skb->dev);
 
@@ -483,11 +484,12 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
 		skb = tcp_write_queue_head(sk);
 		BUG_ON(!skb);
 
+		tcp_mstamp_refresh(tp);
+		delta_us = (u32)(tp->tcp_mstamp - skb->skb_mstamp);
 		remaining = icsk->icsk_rto -
-			    min(icsk->icsk_rto,
-				tcp_time_stamp - tcp_skb_timestamp(skb));
+			    usecs_to_jiffies(delta_us);
 
-		if (remaining) {
+		if (remaining > 0) {
 			inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
 						  remaining, TCP_RTO_MAX);
 		} else {
@@ -811,7 +813,7 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
 	tcp_v4_send_ack(sk, skb,
 			tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
 			tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
-			tcp_time_stamp + tcptw->tw_ts_offset,
+			tcp_time_stamp_raw() + tcptw->tw_ts_offset,
 			tcptw->tw_ts_recent,
 			tw->tw_bound_dev_if,
 			tcp_twsk_md5_key(tcptw),
@@ -839,7 +841,7 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
 	tcp_v4_send_ack(sk, skb, seq,
 			tcp_rsk(req)->rcv_nxt,
 			req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
-			tcp_time_stamp + tcp_rsk(req)->ts_off,
+			tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
 			req->ts_recent,
 			0,
 			tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr,
diff --git a/net/ipv4/tcp_lp.c b/net/ipv4/tcp_lp.c
index d6fb6c067af4..ae10ed64fe13 100644
--- a/net/ipv4/tcp_lp.c
+++ b/net/ipv4/tcp_lp.c
@@ -37,7 +37,7 @@
 #include <net/tcp.h>
 
 /* resolution of owd */
-#define LP_RESOL       1000
+#define LP_RESOL       TCP_TS_HZ
 
 /**
  * enum tcp_lp_state
@@ -147,9 +147,9 @@ static u32 tcp_lp_remote_hz_estimator(struct sock *sk)
 	    tp->rx_opt.rcv_tsecr == lp->local_ref_time)
 		goto out;
 
-	m = HZ * (tp->rx_opt.rcv_tsval -
-		  lp->remote_ref_time) / (tp->rx_opt.rcv_tsecr -
-					  lp->local_ref_time);
+	m = TCP_TS_HZ *
+	    (tp->rx_opt.rcv_tsval - lp->remote_ref_time) /
+	    (tp->rx_opt.rcv_tsecr - lp->local_ref_time);
 	if (m < 0)
 		m = -m;
 
@@ -194,7 +194,7 @@ static u32 tcp_lp_owd_calculator(struct sock *sk)
 	if (lp->flag & LP_VALID_RHZ) {
 		owd =
 		    tp->rx_opt.rcv_tsval * (LP_RESOL / lp->remote_hz) -
-		    tp->rx_opt.rcv_tsecr * (LP_RESOL / HZ);
+		    tp->rx_opt.rcv_tsecr * (LP_RESOL / TCP_TS_HZ);
 		if (owd < 0)
 			owd = -owd;
 	}
@@ -264,18 +264,19 @@ static void tcp_lp_pkts_acked(struct sock *sk, const struct ack_sample *sample)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct lp *lp = inet_csk_ca(sk);
+	u32 now = tcp_time_stamp(tp);
 	u32 delta;
 
 	if (sample->rtt_us > 0)
 		tcp_lp_rtt_sample(sk, sample->rtt_us);
 
 	/* calc inference */
-	delta = tcp_time_stamp - tp->rx_opt.rcv_tsecr;
+	delta = now - tp->rx_opt.rcv_tsecr;
 	if ((s32)delta > 0)
 		lp->inference = 3 * delta;
 
 	/* test if within inference */
-	if (lp->last_drop && (tcp_time_stamp - lp->last_drop < lp->inference))
+	if (lp->last_drop && (now - lp->last_drop < lp->inference))
 		lp->flag |= LP_WITHIN_INF;
 	else
 		lp->flag &= ~LP_WITHIN_INF;
@@ -312,7 +313,7 @@ static void tcp_lp_pkts_acked(struct sock *sk, const struct ack_sample *sample)
 		tp->snd_cwnd = max(tp->snd_cwnd >> 1U, 1U);
 
 	/* record this drop time */
-	lp->last_drop = tcp_time_stamp;
+	lp->last_drop = now;
 }
 
 static struct tcp_congestion_ops tcp_lp __read_mostly = {
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index 653bbd67e3a3..102b2c90bb80 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -524,7 +524,7 @@ reset:
 		tp->snd_cwnd = 1;
 	else
 		tp->snd_cwnd = tcp_init_cwnd(tp, dst);
-	tp->snd_cwnd_stamp = tcp_time_stamp;
+	tp->snd_cwnd_stamp = tcp_jiffies32;
 }
 
 bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst)
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 717be4de5324..d0642df73044 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -445,9 +445,9 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
 
 		newtp->srtt_us = 0;
 		newtp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT);
-		minmax_reset(&newtp->rtt_min, tcp_time_stamp, ~0U);
+		minmax_reset(&newtp->rtt_min, tcp_jiffies32, ~0U);
 		newicsk->icsk_rto = TCP_TIMEOUT_INIT;
-		newicsk->icsk_ack.lrcvtime = tcp_time_stamp;
+		newicsk->icsk_ack.lrcvtime = tcp_jiffies32;
 
 		newtp->packets_out = 0;
 		newtp->retrans_out = 0;
@@ -455,7 +455,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
 		newtp->fackets_out = 0;
 		newtp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
 		newtp->tlp_high_seq = 0;
-		newtp->lsndtime = treq->snt_synack.stamp_jiffies;
+		newtp->lsndtime = tcp_jiffies32;
 		newsk->sk_txhash = treq->txhash;
 		newtp->last_oow_ack_time = 0;
 		newtp->total_retrans = req->num_retrans;
@@ -526,7 +526,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
 		newtp->fastopen_req = NULL;
 		newtp->fastopen_rsk = NULL;
 		newtp->syn_data_acked = 0;
-		newtp->rack.mstamp.v64 = 0;
+		newtp->rack.mstamp = 0;
 		newtp->rack.advanced = 0;
 
 		__TCP_INC_STATS(sock_net(sk), TCP_MIB_PASSIVEOPENS);
diff --git a/net/ipv4/tcp_nv.c b/net/ipv4/tcp_nv.c
index 5de82a8d4d87..6d650ed3cb59 100644
--- a/net/ipv4/tcp_nv.c
+++ b/net/ipv4/tcp_nv.c
@@ -424,8 +424,8 @@ static void tcpnv_acked(struct sock *sk, const struct ack_sample *sample)
 }
 
 /* Extract info for Tcp socket info provided via netlink */
-size_t tcpnv_get_info(struct sock *sk, u32 ext, int *attr,
-		      union tcp_cc_info *info)
+static size_t tcpnv_get_info(struct sock *sk, u32 ext, int *attr,
+			     union tcp_cc_info *info)
 {
 	const struct tcpnv *ca = inet_csk_ca(sk);
 
@@ -440,7 +440,6 @@ size_t tcpnv_get_info(struct sock *sk, u32 ext, int *attr,
 	}
 	return 0;
 }
-EXPORT_SYMBOL_GPL(tcpnv_get_info);
 
 static struct tcp_congestion_ops tcpnv __read_mostly = {
 	.init		= tcpnv_init,
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 4858e190f6ac..e3aab1c1cf78 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -151,7 +151,7 @@ void tcp_cwnd_restart(struct sock *sk, s32 delta)
 	while ((delta -= inet_csk(sk)->icsk_rto) > 0 && cwnd > restart_cwnd)
 		cwnd >>= 1;
 	tp->snd_cwnd = max(cwnd, restart_cwnd);
-	tp->snd_cwnd_stamp = tcp_time_stamp;
+	tp->snd_cwnd_stamp = tcp_jiffies32;
 	tp->snd_cwnd_used = 0;
 }
 
@@ -160,7 +160,7 @@ static void tcp_event_data_sent(struct tcp_sock *tp,
 				struct sock *sk)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
-	const u32 now = tcp_time_stamp;
+	const u32 now = tcp_jiffies32;
 
 	if (tcp_packets_in_flight(tp) == 0)
 		tcp_ca_event(sk, CA_EVENT_TX_START);
@@ -904,6 +904,72 @@ out:
 	sk_free(sk);
 }
 
+/* Note: Called under hard irq.
+ * We can not call TCP stack right away.
+ */
+enum hrtimer_restart tcp_pace_kick(struct hrtimer *timer)
+{
+	struct tcp_sock *tp = container_of(timer, struct tcp_sock, pacing_timer);
+	struct sock *sk = (struct sock *)tp;
+	unsigned long nval, oval;
+
+	for (oval = READ_ONCE(sk->sk_tsq_flags);; oval = nval) {
+		struct tsq_tasklet *tsq;
+		bool empty;
+
+		if (oval & TSQF_QUEUED)
+			break;
+
+		nval = (oval & ~TSQF_THROTTLED) | TSQF_QUEUED | TCPF_TSQ_DEFERRED;
+		nval = cmpxchg(&sk->sk_tsq_flags, oval, nval);
+		if (nval != oval)
+			continue;
+
+		if (!atomic_inc_not_zero(&sk->sk_wmem_alloc))
+			break;
+		/* queue this socket to tasklet queue */
+		tsq = this_cpu_ptr(&tsq_tasklet);
+		empty = list_empty(&tsq->head);
+		list_add(&tp->tsq_node, &tsq->head);
+		if (empty)
+			tasklet_schedule(&tsq->tasklet);
+		break;
+	}
+	return HRTIMER_NORESTART;
+}
+
+/* BBR congestion control needs pacing.
+ * Same remark for SO_MAX_PACING_RATE.
+ * sch_fq packet scheduler is efficiently handling pacing,
+ * but is not always installed/used.
+ * Return true if TCP stack should pace packets itself.
+ */
+static bool tcp_needs_internal_pacing(const struct sock *sk)
+{
+	return smp_load_acquire(&sk->sk_pacing_status) == SK_PACING_NEEDED;
+}
+
+static void tcp_internal_pacing(struct sock *sk, const struct sk_buff *skb)
+{
+	u64 len_ns;
+	u32 rate;
+
+	if (!tcp_needs_internal_pacing(sk))
+		return;
+	rate = sk->sk_pacing_rate;
+	if (!rate || rate == ~0U)
+		return;
+
+	/* Should account for header sizes as sch_fq does,
+	 * but lets make things simple.
+	 */
+	len_ns = (u64)skb->len * NSEC_PER_SEC;
+	do_div(len_ns, rate);
+	hrtimer_start(&tcp_sk(sk)->pacing_timer,
+		      ktime_add_ns(ktime_get(), len_ns),
+		      HRTIMER_MODE_ABS_PINNED);
+}
+
 /* This routine actually transmits TCP packets queued in by
  * tcp_do_sendmsg().  This is used by both the initial
  * transmission and possible later retransmissions.
@@ -931,8 +997,8 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 	BUG_ON(!skb || !tcp_skb_pcount(skb));
 	tp = tcp_sk(sk);
 
+	skb->skb_mstamp = tp->tcp_mstamp;
 	if (clone_it) {
-		skb_mstamp_get(&skb->skb_mstamp);
 		TCP_SKB_CB(skb)->tx.in_flight = TCP_SKB_CB(skb)->end_seq
 			- tp->snd_una;
 		tcp_rate_skb_sent(sk, skb);
@@ -1034,6 +1100,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 	if (skb->len != tcp_header_size) {
 		tcp_event_data_sent(tp, sk);
 		tp->data_segs_out += tcp_skb_pcount(skb);
+		tcp_internal_pacing(sk, skb);
 	}
 
 	if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq)
@@ -1261,9 +1328,8 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
 	return 0;
 }
 
-/* This is similar to __pskb_pull_head() (it will go to core/skbuff.c
- * eventually). The difference is that pulled data not copied, but
- * immediately discarded.
+/* This is similar to __pskb_pull_tail(). The difference is that pulled
+ * data is not copied, but immediately discarded.
  */
 static int __pskb_trim_head(struct sk_buff *skb, int len)
 {
@@ -1298,7 +1364,6 @@ static int __pskb_trim_head(struct sk_buff *skb, int len)
 	}
 	shinfo->nr_frags = k;
 
-	skb_reset_tail_pointer(skb);
 	skb->data_len -= len;
 	skb->len = skb->data_len;
 	return len;
@@ -1408,7 +1473,7 @@ void tcp_mtup_init(struct sock *sk)
 	icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, net->ipv4.sysctl_tcp_base_mss);
 	icsk->icsk_mtup.probe_size = 0;
 	if (icsk->icsk_mtup.enabled)
-		icsk->icsk_mtup.probe_timestamp = tcp_time_stamp;
+		icsk->icsk_mtup.probe_timestamp = tcp_jiffies32;
 }
 EXPORT_SYMBOL(tcp_mtup_init);
 
@@ -1509,7 +1574,7 @@ static void tcp_cwnd_application_limited(struct sock *sk)
 		}
 		tp->snd_cwnd_used = 0;
 	}
-	tp->snd_cwnd_stamp = tcp_time_stamp;
+	tp->snd_cwnd_stamp = tcp_jiffies32;
 }
 
 static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited)
@@ -1530,14 +1595,14 @@ static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited)
 	if (tcp_is_cwnd_limited(sk)) {
 		/* Network is feed fully. */
 		tp->snd_cwnd_used = 0;
-		tp->snd_cwnd_stamp = tcp_time_stamp;
+		tp->snd_cwnd_stamp = tcp_jiffies32;
 	} else {
 		/* Network starves. */
 		if (tp->packets_out > tp->snd_cwnd_used)
 			tp->snd_cwnd_used = tp->packets_out;
 
 		if (sysctl_tcp_slow_start_after_idle &&
-		    (s32)(tcp_time_stamp - tp->snd_cwnd_stamp) >= inet_csk(sk)->icsk_rto &&
+		    (s32)(tcp_jiffies32 - tp->snd_cwnd_stamp) >= inet_csk(sk)->icsk_rto &&
 		    !ca_ops->cong_control)
 			tcp_cwnd_application_limited(sk);
 
@@ -1839,7 +1904,6 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb,
 	const struct inet_connection_sock *icsk = inet_csk(sk);
 	u32 age, send_win, cong_win, limit, in_flight;
 	struct tcp_sock *tp = tcp_sk(sk);
-	struct skb_mstamp now;
 	struct sk_buff *head;
 	int win_divisor;
 
@@ -1852,7 +1916,7 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb,
 	/* Avoid bursty behavior by allowing defer
 	 * only if the last write was recent.
 	 */
-	if ((s32)(tcp_time_stamp - tp->lsndtime) > 0)
+	if ((s32)(tcp_jiffies32 - tp->lsndtime) > 0)
 		goto send_now;
 
 	in_flight = tcp_packets_in_flight(tp);
@@ -1895,8 +1959,8 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb,
 	}
 
 	head = tcp_write_queue_head(sk);
-	skb_mstamp_get(&now);
-	age = skb_mstamp_us_delta(&now, &head->skb_mstamp);
+
+	age = tcp_stamp_us_delta(tp->tcp_mstamp, head->skb_mstamp);
 	/* If next ACK is likely to come too late (half srtt), do not defer */
 	if (age < (tp->srtt_us >> 4))
 		goto send_now;
@@ -1921,7 +1985,7 @@ static inline void tcp_mtu_check_reprobe(struct sock *sk)
 	s32 delta;
 
 	interval = net->ipv4.sysctl_tcp_probe_interval;
-	delta = tcp_time_stamp - icsk->icsk_mtup.probe_timestamp;
+	delta = tcp_jiffies32 - icsk->icsk_mtup.probe_timestamp;
 	if (unlikely(delta >= interval * HZ)) {
 		int mss = tcp_current_mss(sk);
 
@@ -1933,7 +1997,7 @@ static inline void tcp_mtu_check_reprobe(struct sock *sk)
 		icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss);
 
 		/* Update probe time stamp */
-		icsk->icsk_mtup.probe_timestamp = tcp_time_stamp;
+		icsk->icsk_mtup.probe_timestamp = tcp_jiffies32;
 	}
 }
 
@@ -2086,6 +2150,12 @@ static int tcp_mtu_probe(struct sock *sk)
 	return -1;
 }
 
+static bool tcp_pacing_check(const struct sock *sk)
+{
+	return tcp_needs_internal_pacing(sk) &&
+	       hrtimer_active(&tcp_sk(sk)->pacing_timer);
+}
+
 /* TCP Small Queues :
  * Control number of packets in qdisc/devices to two packets / or ~1 ms.
  * (These limits are doubled for retransmits)
@@ -2130,7 +2200,7 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb,
 
 static void tcp_chrono_set(struct tcp_sock *tp, const enum tcp_chrono new)
 {
-	const u32 now = tcp_time_stamp;
+	const u32 now = tcp_jiffies32;
 
 	if (tp->chrono_type > TCP_CHRONO_UNSPEC)
 		tp->chrono_stat[tp->chrono_type - 1] += now - tp->chrono_start;
@@ -2207,15 +2277,19 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
 	}
 
 	max_segs = tcp_tso_segs(sk, mss_now);
+	tcp_mstamp_refresh(tp);
 	while ((skb = tcp_send_head(sk))) {
 		unsigned int limit;
 
+		if (tcp_pacing_check(sk))
+			break;
+
 		tso_segs = tcp_init_tso_segs(skb, mss_now);
 		BUG_ON(!tso_segs);
 
 		if (unlikely(tp->repair) && tp->repair_queue == TCP_SEND_QUEUE) {
 			/* "skb_mstamp" is used as a start point for the retransmit timer */
-			skb_mstamp_get(&skb->skb_mstamp);
+			skb->skb_mstamp = tp->tcp_mstamp;
 			goto repair; /* Skip network transmission */
 		}
 
@@ -2342,10 +2416,10 @@ bool tcp_schedule_loss_probe(struct sock *sk)
 	timeout = max_t(u32, timeout, msecs_to_jiffies(10));
 
 	/* If RTO is shorter, just schedule TLP in its place. */
-	tlp_time_stamp = tcp_time_stamp + timeout;
+	tlp_time_stamp = tcp_jiffies32 + timeout;
 	rto_time_stamp = (u32)inet_csk(sk)->icsk_timeout;
 	if ((s32)(tlp_time_stamp - rto_time_stamp) > 0) {
-		s32 delta = rto_time_stamp - tcp_time_stamp;
+		s32 delta = rto_time_stamp - tcp_jiffies32;
 		if (delta > 0)
 			timeout = delta;
 	}
@@ -2803,7 +2877,7 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
 		     skb_headroom(skb) >= 0xFFFF)) {
 		struct sk_buff *nskb;
 
-		skb_mstamp_get(&skb->skb_mstamp);
+		skb->skb_mstamp = tp->tcp_mstamp;
 		nskb = __pskb_copy(skb, MAX_TCP_HEADER, GFP_ATOMIC);
 		err = nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) :
 			     -ENOBUFS;
@@ -2878,6 +2952,10 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
 
 		if (skb == tcp_send_head(sk))
 			break;
+
+		if (tcp_pacing_check(sk))
+			break;
+
 		/* we could do better than to assign each time */
 		if (!hole)
 			tp->retransmit_skb_hint = skb;
@@ -3015,7 +3093,7 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority)
 	skb_reserve(skb, MAX_TCP_HEADER);
 	tcp_init_nondata_skb(skb, tcp_acceptable_seq(sk),
 			     TCPHDR_ACK | TCPHDR_RST);
-	skb_mstamp_get(&skb->skb_mstamp);
+	tcp_mstamp_refresh(tcp_sk(sk));
 	/* Send it off. */
 	if (tcp_transmit_skb(sk, skb, 0, priority))
 		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTFAILED);
@@ -3111,10 +3189,10 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
 	memset(&opts, 0, sizeof(opts));
 #ifdef CONFIG_SYN_COOKIES
 	if (unlikely(req->cookie_ts))
-		skb->skb_mstamp.stamp_jiffies = cookie_init_timestamp(req);
+		skb->skb_mstamp = cookie_init_timestamp(req);
 	else
 #endif
-	skb_mstamp_get(&skb->skb_mstamp);
+		skb->skb_mstamp = tcp_clock_us();
 
 #ifdef CONFIG_TCP_MD5SIG
 	rcu_read_lock();
@@ -3244,7 +3322,7 @@ static void tcp_connect_init(struct sock *sk)
 	if (likely(!tp->repair))
 		tp->rcv_nxt = 0;
 	else
-		tp->rcv_tstamp = tcp_time_stamp;
+		tp->rcv_tstamp = tcp_jiffies32;
 	tp->rcv_wup = tp->rcv_nxt;
 	tp->copied_seq = tp->rcv_nxt;
 
@@ -3373,7 +3451,8 @@ int tcp_connect(struct sock *sk)
 		return -ENOBUFS;
 
 	tcp_init_nondata_skb(buff, tp->write_seq++, TCPHDR_SYN);
-	tp->retrans_stamp = tcp_time_stamp;
+	tcp_mstamp_refresh(tp);
+	tp->retrans_stamp = tcp_time_stamp(tp);
 	tcp_connect_queue_skb(sk, buff);
 	tcp_ecn_send_syn(sk, buff);
 
@@ -3492,7 +3571,6 @@ void tcp_send_ack(struct sock *sk)
 	skb_set_tcp_pure_ack(buff);
 
 	/* Send it off, this clears delayed acks for us. */
-	skb_mstamp_get(&buff->skb_mstamp);
 	tcp_transmit_skb(sk, buff, 0, (__force gfp_t)0);
 }
 EXPORT_SYMBOL_GPL(tcp_send_ack);
@@ -3526,15 +3604,16 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent, int mib)
 	 * send it.
 	 */
 	tcp_init_nondata_skb(skb, tp->snd_una - !urgent, TCPHDR_ACK);
-	skb_mstamp_get(&skb->skb_mstamp);
 	NET_INC_STATS(sock_net(sk), mib);
 	return tcp_transmit_skb(sk, skb, 0, (__force gfp_t)0);
 }
 
+/* Called from setsockopt( ... TCP_REPAIR ) */
 void tcp_send_window_probe(struct sock *sk)
 {
 	if (sk->sk_state == TCP_ESTABLISHED) {
 		tcp_sk(sk)->snd_wl1 = tcp_sk(sk)->rcv_nxt - 1;
+		tcp_mstamp_refresh(tcp_sk(sk));
 		tcp_xmit_probe_skb(sk, 0, LINUX_MIB_TCPWINPROBE);
 	}
 }
diff --git a/net/ipv4/tcp_rate.c b/net/ipv4/tcp_rate.c
index c6a9fa894646..ad99569d4c1e 100644
--- a/net/ipv4/tcp_rate.c
+++ b/net/ipv4/tcp_rate.c
@@ -78,7 +78,7 @@ void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb,
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
 
-	if (!scb->tx.delivered_mstamp.v64)
+	if (!scb->tx.delivered_mstamp)
 		return;
 
 	if (!rs->prior_delivered ||
@@ -89,9 +89,9 @@ void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb,
 		rs->is_retrans	     = scb->sacked & TCPCB_RETRANS;
 
 		/* Find the duration of the "send phase" of this window: */
-		rs->interval_us      = skb_mstamp_us_delta(
-						&skb->skb_mstamp,
-						&scb->tx.first_tx_mstamp);
+		rs->interval_us      = tcp_stamp_us_delta(
+						skb->skb_mstamp,
+						scb->tx.first_tx_mstamp);
 
 		/* Record send time of most recently ACKed packet: */
 		tp->first_tx_mstamp  = skb->skb_mstamp;
@@ -101,7 +101,7 @@ void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb,
 	 * we don't need to reset since it'll be freed soon.
 	 */
 	if (scb->sacked & TCPCB_SACKED_ACKED)
-		scb->tx.delivered_mstamp.v64 = 0;
+		scb->tx.delivered_mstamp = 0;
 }
 
 /* Update the connection delivery information and generate a rate sample. */
@@ -125,7 +125,7 @@ void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost,
 	rs->acked_sacked = delivered;	/* freshly ACKed or SACKed */
 	rs->losses = lost;		/* freshly marked lost */
 	/* Return an invalid sample if no timing information is available. */
-	if (!rs->prior_mstamp.v64) {
+	if (!rs->prior_mstamp) {
 		rs->delivered = -1;
 		rs->interval_us = -1;
 		return;
@@ -138,8 +138,8 @@ void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost,
 	 * longer phase.
 	 */
 	snd_us = rs->interval_us;				/* send phase */
-	ack_us = skb_mstamp_us_delta(&tp->tcp_mstamp,
-				     &rs->prior_mstamp); /* ack phase */
+	ack_us = tcp_stamp_us_delta(tp->tcp_mstamp,
+				    rs->prior_mstamp); /* ack phase */
 	rs->interval_us = max(snd_us, ack_us);
 
 	/* Normally we expect interval_us >= min-rtt.
diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c
index 362b8c75bfab..fe9a493d0208 100644
--- a/net/ipv4/tcp_recovery.c
+++ b/net/ipv4/tcp_recovery.c
@@ -17,12 +17,9 @@ static void tcp_rack_mark_skb_lost(struct sock *sk, struct sk_buff *skb)
 	}
 }
 
-static bool tcp_rack_sent_after(const struct skb_mstamp *t1,
-				const struct skb_mstamp *t2,
-				u32 seq1, u32 seq2)
+static bool tcp_rack_sent_after(u64 t1, u64 t2, u32 seq1, u32 seq2)
 {
-	return skb_mstamp_after(t1, t2) ||
-	       (t1->v64 == t2->v64 && after(seq1, seq2));
+	return t1 > t2 || (t1 == t2 && after(seq1, seq2));
 }
 
 /* RACK loss detection (IETF draft draft-ietf-tcpm-rack-01):
@@ -72,14 +69,14 @@ static void tcp_rack_detect_loss(struct sock *sk, u32 *reo_timeout)
 		    scb->sacked & TCPCB_SACKED_ACKED)
 			continue;
 
-		if (tcp_rack_sent_after(&tp->rack.mstamp, &skb->skb_mstamp,
+		if (tcp_rack_sent_after(tp->rack.mstamp, skb->skb_mstamp,
 					tp->rack.end_seq, scb->end_seq)) {
 			/* Step 3 in draft-cheng-tcpm-rack-00.txt:
 			 * A packet is lost if its elapsed time is beyond
 			 * the recent RTT plus the reordering window.
 			 */
-			u32 elapsed = skb_mstamp_us_delta(&tp->tcp_mstamp,
-							  &skb->skb_mstamp);
+			u32 elapsed = tcp_stamp_us_delta(tp->tcp_mstamp,
+							 skb->skb_mstamp);
 			s32 remaining = tp->rack.rtt_us + reo_wnd - elapsed;
 
 			if (remaining < 0) {
@@ -127,16 +124,16 @@ void tcp_rack_mark_lost(struct sock *sk)
  * draft-cheng-tcpm-rack-00.txt
  */
 void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq,
-		      const struct skb_mstamp *xmit_time)
+		      u64 xmit_time)
 {
 	u32 rtt_us;
 
-	if (tp->rack.mstamp.v64 &&
-	    !tcp_rack_sent_after(xmit_time, &tp->rack.mstamp,
+	if (tp->rack.mstamp &&
+	    !tcp_rack_sent_after(xmit_time, tp->rack.mstamp,
 				 end_seq, tp->rack.end_seq))
 		return;
 
-	rtt_us = skb_mstamp_us_delta(&tp->tcp_mstamp, xmit_time);
+	rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, xmit_time);
 	if (sacked & TCPCB_RETRANS) {
 		/* If the sacked packet was retransmitted, it's ambiguous
 		 * whether the retransmission or the original (or the prior
@@ -152,7 +149,7 @@ void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq,
 			return;
 	}
 	tp->rack.rtt_us = rtt_us;
-	tp->rack.mstamp = *xmit_time;
+	tp->rack.mstamp = xmit_time;
 	tp->rack.end_seq = end_seq;
 	tp->rack.advanced = 1;
 }
@@ -166,7 +163,6 @@ void tcp_rack_reo_timeout(struct sock *sk)
 	u32 timeout, prior_inflight;
 
 	prior_inflight = tcp_packets_in_flight(tp);
-	skb_mstamp_get(&tp->tcp_mstamp);
 	tcp_rack_detect_loss(sk, &timeout);
 	if (prior_inflight != tcp_packets_in_flight(tp)) {
 		if (inet_csk(sk)->icsk_ca_state != TCP_CA_Recovery) {
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 14672543cf0b..c0feeeef962a 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -63,7 +63,7 @@ static int tcp_out_of_resources(struct sock *sk, bool do_reset)
 
 	/* If peer does not open window for long time, or did not transmit
 	 * anything for long time, penalize it. */
-	if ((s32)(tcp_time_stamp - tp->lsndtime) > 2*TCP_RTO_MAX || !do_reset)
+	if ((s32)(tcp_jiffies32 - tp->lsndtime) > 2*TCP_RTO_MAX || !do_reset)
 		shift++;
 
 	/* If some dubious ICMP arrived, penalize even more. */
@@ -73,7 +73,7 @@ static int tcp_out_of_resources(struct sock *sk, bool do_reset)
 	if (tcp_check_oom(sk, shift)) {
 		/* Catch exceptional cases, when connection requires reset.
 		 *      1. Last segment was sent recently. */
-		if ((s32)(tcp_time_stamp - tp->lsndtime) <= TCP_TIMEWAIT_LEN ||
+		if ((s32)(tcp_jiffies32 - tp->lsndtime) <= TCP_TIMEWAIT_LEN ||
 		    /*  2. Window is closed. */
 		    (!tp->snd_wnd && !tp->packets_out))
 			do_reset = true;
@@ -115,7 +115,7 @@ static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk)
 	if (net->ipv4.sysctl_tcp_mtu_probing) {
 		if (!icsk->icsk_mtup.enabled) {
 			icsk->icsk_mtup.enabled = 1;
-			icsk->icsk_mtup.probe_timestamp = tcp_time_stamp;
+			icsk->icsk_mtup.probe_timestamp = tcp_jiffies32;
 			tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
 		} else {
 			struct net *net = sock_net(sk);
@@ -139,22 +139,18 @@ static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk)
  *  @timeout:  A custom timeout value.
  *             If set to 0 the default timeout is calculated and used.
  *             Using TCP_RTO_MIN and the number of unsuccessful retransmits.
- *  @syn_set:  true if the SYN Bit was set.
  *
  * The default "timeout" value this function can calculate and use
  * is equivalent to the timeout of a TCP Connection
  * after "boundary" unsuccessful, exponentially backed-off
- * retransmissions with an initial RTO of TCP_RTO_MIN or TCP_TIMEOUT_INIT if
- * syn_set flag is set.
- *
+ * retransmissions with an initial RTO of TCP_RTO_MIN.
  */
 static bool retransmits_timed_out(struct sock *sk,
 				  unsigned int boundary,
-				  unsigned int timeout,
-				  bool syn_set)
+				  unsigned int timeout)
 {
+	const unsigned int rto_base = TCP_RTO_MIN;
 	unsigned int linear_backoff_thresh, start_ts;
-	unsigned int rto_base = syn_set ? TCP_TIMEOUT_INIT : TCP_RTO_MIN;
 
 	if (!inet_csk(sk)->icsk_retransmits)
 		return false;
@@ -172,7 +168,7 @@ static bool retransmits_timed_out(struct sock *sk,
 			timeout = ((2 << linear_backoff_thresh) - 1) * rto_base +
 				(boundary - linear_backoff_thresh) * TCP_RTO_MAX;
 	}
-	return (tcp_time_stamp - start_ts) >= timeout;
+	return (tcp_time_stamp(tcp_sk(sk)) - start_ts) >= jiffies_to_msecs(timeout);
 }
 
 /* A write timeout has occurred. Process the after effects. */
@@ -181,8 +177,8 @@ static int tcp_write_timeout(struct sock *sk)
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct net *net = sock_net(sk);
+	bool expired, do_reset;
 	int retry_until;
-	bool do_reset, syn_set = false;
 
 	if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
 		if (icsk->icsk_retransmits) {
@@ -196,9 +192,9 @@ static int tcp_write_timeout(struct sock *sk)
 			sk_rethink_txhash(sk);
 		}
 		retry_until = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_syn_retries;
-		syn_set = true;
+		expired = icsk->icsk_retransmits >= retry_until;
 	} else {
-		if (retransmits_timed_out(sk, net->ipv4.sysctl_tcp_retries1, 0, 0)) {
+		if (retransmits_timed_out(sk, net->ipv4.sysctl_tcp_retries1, 0)) {
 			/* Some middle-boxes may black-hole Fast Open _after_
 			 * the handshake. Therefore we conservatively disable
 			 * Fast Open on this path on recurring timeouts after
@@ -224,15 +220,15 @@ static int tcp_write_timeout(struct sock *sk)
 
 			retry_until = tcp_orphan_retries(sk, alive);
 			do_reset = alive ||
-				!retransmits_timed_out(sk, retry_until, 0, 0);
+				!retransmits_timed_out(sk, retry_until, 0);
 
 			if (tcp_out_of_resources(sk, do_reset))
 				return 1;
 		}
+		expired = retransmits_timed_out(sk, retry_until,
+						icsk->icsk_user_timeout);
 	}
-
-	if (retransmits_timed_out(sk, retry_until,
-				  syn_set ? 0 : icsk->icsk_user_timeout, syn_set)) {
+	if (expired) {
 		/* Has it gone just too far? */
 		tcp_write_err(sk);
 		return 1;
@@ -339,9 +335,10 @@ static void tcp_probe_timer(struct sock *sk)
 	 */
 	start_ts = tcp_skb_timestamp(tcp_send_head(sk));
 	if (!start_ts)
-		skb_mstamp_get(&tcp_send_head(sk)->skb_mstamp);
+		tcp_send_head(sk)->skb_mstamp = tp->tcp_mstamp;
 	else if (icsk->icsk_user_timeout &&
-		 (s32)(tcp_time_stamp - start_ts) > icsk->icsk_user_timeout)
+		 (s32)(tcp_time_stamp(tp) - start_ts) >
+		 jiffies_to_msecs(icsk->icsk_user_timeout))
 		goto abort;
 
 	max_probes = sock_net(sk)->ipv4.sysctl_tcp_retries2;
@@ -451,7 +448,7 @@ void tcp_retransmit_timer(struct sock *sk)
 					    tp->snd_una, tp->snd_nxt);
 		}
 #endif
-		if (tcp_time_stamp - tp->rcv_tstamp > TCP_RTO_MAX) {
+		if (tcp_jiffies32 - tp->rcv_tstamp > TCP_RTO_MAX) {
 			tcp_write_err(sk);
 			goto out;
 		}
@@ -539,7 +536,7 @@ out_reset_timer:
 		icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX);
 	}
 	inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX);
-	if (retransmits_timed_out(sk, net->ipv4.sysctl_tcp_retries1 + 1, 0, 0))
+	if (retransmits_timed_out(sk, net->ipv4.sysctl_tcp_retries1 + 1, 0))
 		__sk_dst_reset(sk);
 
 out:;
@@ -561,6 +558,7 @@ void tcp_write_timer_handler(struct sock *sk)
 		goto out;
 	}
 
+	tcp_mstamp_refresh(tcp_sk(sk));
 	event = icsk->icsk_pending;
 
 	switch (event) {
@@ -710,4 +708,7 @@ void tcp_init_xmit_timers(struct sock *sk)
 {
 	inet_csk_init_xmit_timers(sk, &tcp_write_timer, &tcp_delack_timer,
 				  &tcp_keepalive_timer);
+	hrtimer_init(&tcp_sk(sk)->pacing_timer, CLOCK_MONOTONIC,
+		     HRTIMER_MODE_ABS_PINNED);
+	tcp_sk(sk)->pacing_timer.function = tcp_pace_kick;
 }
diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c
index 9775453b8d17..bec9cafbe3f9 100644
--- a/net/ipv4/tcp_westwood.c
+++ b/net/ipv4/tcp_westwood.c
@@ -68,7 +68,7 @@ static void tcp_westwood_init(struct sock *sk)
 	w->cumul_ack = 0;
 	w->reset_rtt_min = 1;
 	w->rtt_min = w->rtt = TCP_WESTWOOD_INIT_RTT;
-	w->rtt_win_sx = tcp_time_stamp;
+	w->rtt_win_sx = tcp_jiffies32;
 	w->snd_una = tcp_sk(sk)->snd_una;
 	w->first_ack = 1;
 }
@@ -116,7 +116,7 @@ static void tcp_westwood_pkts_acked(struct sock *sk,
 static void westwood_update_window(struct sock *sk)
 {
 	struct westwood *w = inet_csk_ca(sk);
-	s32 delta = tcp_time_stamp - w->rtt_win_sx;
+	s32 delta = tcp_jiffies32 - w->rtt_win_sx;
 
 	/* Initialize w->snd_una with the first acked sequence number in order
 	 * to fix mismatch between tp->snd_una and w->snd_una for the first
@@ -140,7 +140,7 @@ static void westwood_update_window(struct sock *sk)
 		westwood_filter(w, delta);
 
 		w->bk = 0;
-		w->rtt_win_sx = tcp_time_stamp;
+		w->rtt_win_sx = tcp_jiffies32;
 	}
 }
 
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 1d6219bf2d6b..fdcb7437cc15 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1164,22 +1164,32 @@ out:
 }
 
 /* fully reclaim rmem/fwd memory allocated for skb */
-static void udp_rmem_release(struct sock *sk, int size, int partial)
+static void udp_rmem_release(struct sock *sk, int size, int partial,
+			     bool rx_queue_lock_held)
 {
 	struct udp_sock *up = udp_sk(sk);
+	struct sk_buff_head *sk_queue;
 	int amt;
 
 	if (likely(partial)) {
 		up->forward_deficit += size;
 		size = up->forward_deficit;
 		if (size < (sk->sk_rcvbuf >> 2) &&
-		    !skb_queue_empty(&sk->sk_receive_queue))
+		    !skb_queue_empty(&up->reader_queue))
 			return;
 	} else {
 		size += up->forward_deficit;
 	}
 	up->forward_deficit = 0;
 
+	/* acquire the sk_receive_queue for fwd allocated memory scheduling,
+	 * if the called don't held it already
+	 */
+	sk_queue = &sk->sk_receive_queue;
+	if (!rx_queue_lock_held)
+		spin_lock(&sk_queue->lock);
+
+
 	sk->sk_forward_alloc += size;
 	amt = (sk->sk_forward_alloc - partial) & ~(SK_MEM_QUANTUM - 1);
 	sk->sk_forward_alloc -= amt;
@@ -1188,19 +1198,31 @@ static void udp_rmem_release(struct sock *sk, int size, int partial)
 		__sk_mem_reduce_allocated(sk, amt >> SK_MEM_QUANTUM_SHIFT);
 
 	atomic_sub(size, &sk->sk_rmem_alloc);
+
+	/* this can save us from acquiring the rx queue lock on next receive */
+	skb_queue_splice_tail_init(sk_queue, &up->reader_queue);
+
+	if (!rx_queue_lock_held)
+		spin_unlock(&sk_queue->lock);
 }
 
-/* Note: called with sk_receive_queue.lock held.
+/* Note: called with reader_queue.lock held.
  * Instead of using skb->truesize here, find a copy of it in skb->dev_scratch
  * This avoids a cache line miss while receive_queue lock is held.
  * Look at __udp_enqueue_schedule_skb() to find where this copy is done.
  */
 void udp_skb_destructor(struct sock *sk, struct sk_buff *skb)
 {
-	udp_rmem_release(sk, skb->dev_scratch, 1);
+	udp_rmem_release(sk, skb->dev_scratch, 1, false);
 }
 EXPORT_SYMBOL(udp_skb_destructor);
 
+/* as above, but the caller held the rx queue lock, too */
+static void udp_skb_dtor_locked(struct sock *sk, struct sk_buff *skb)
+{
+	udp_rmem_release(sk, skb->dev_scratch, 1, true);
+}
+
 /* Idea of busylocks is to let producers grab an extra spinlock
  * to relieve pressure on the receive_queue spinlock shared by consumer.
  * Under flood, this means that only one producer can be in line
@@ -1306,14 +1328,16 @@ EXPORT_SYMBOL_GPL(__udp_enqueue_schedule_skb);
 void udp_destruct_sock(struct sock *sk)
 {
 	/* reclaim completely the forward allocated memory */
+	struct udp_sock *up = udp_sk(sk);
 	unsigned int total = 0;
 	struct sk_buff *skb;
 
-	while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
+	skb_queue_splice_tail_init(&sk->sk_receive_queue, &up->reader_queue);
+	while ((skb = __skb_dequeue(&up->reader_queue)) != NULL) {
 		total += skb->truesize;
 		kfree_skb(skb);
 	}
-	udp_rmem_release(sk, total, 0);
+	udp_rmem_release(sk, total, 0, true);
 
 	inet_sock_destruct(sk);
 }
@@ -1321,6 +1345,7 @@ EXPORT_SYMBOL_GPL(udp_destruct_sock);
 
 int udp_init_sock(struct sock *sk)
 {
+	skb_queue_head_init(&udp_sk(sk)->reader_queue);
 	sk->sk_destruct = udp_destruct_sock;
 	return 0;
 }
@@ -1338,6 +1363,26 @@ void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len)
 }
 EXPORT_SYMBOL_GPL(skb_consume_udp);
 
+static struct sk_buff *__first_packet_length(struct sock *sk,
+					     struct sk_buff_head *rcvq,
+					     int *total)
+{
+	struct sk_buff *skb;
+
+	while ((skb = skb_peek(rcvq)) != NULL &&
+	       udp_lib_checksum_complete(skb)) {
+		__UDP_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS,
+				IS_UDPLITE(sk));
+		__UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS,
+				IS_UDPLITE(sk));
+		atomic_inc(&sk->sk_drops);
+		__skb_unlink(skb, rcvq);
+		*total += skb->truesize;
+		kfree_skb(skb);
+	}
+	return skb;
+}
+
 /**
  *	first_packet_length	- return length of first packet in receive queue
  *	@sk: socket
@@ -1347,26 +1392,24 @@ EXPORT_SYMBOL_GPL(skb_consume_udp);
  */
 static int first_packet_length(struct sock *sk)
 {
-	struct sk_buff_head *rcvq = &sk->sk_receive_queue;
+	struct sk_buff_head *rcvq = &udp_sk(sk)->reader_queue;
+	struct sk_buff_head *sk_queue = &sk->sk_receive_queue;
 	struct sk_buff *skb;
 	int total = 0;
 	int res;
 
 	spin_lock_bh(&rcvq->lock);
-	while ((skb = skb_peek(rcvq)) != NULL &&
-		udp_lib_checksum_complete(skb)) {
-		__UDP_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS,
-				IS_UDPLITE(sk));
-		__UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS,
-				IS_UDPLITE(sk));
-		atomic_inc(&sk->sk_drops);
-		__skb_unlink(skb, rcvq);
-		total += skb->truesize;
-		kfree_skb(skb);
+	skb = __first_packet_length(sk, rcvq, &total);
+	if (!skb && !skb_queue_empty(sk_queue)) {
+		spin_lock(&sk_queue->lock);
+		skb_queue_splice_tail_init(sk_queue, rcvq);
+		spin_unlock(&sk_queue->lock);
+
+		skb = __first_packet_length(sk, rcvq, &total);
 	}
 	res = skb ? skb->len : -1;
 	if (total)
-		udp_rmem_release(sk, total, 1);
+		udp_rmem_release(sk, total, 1, false);
 	spin_unlock_bh(&rcvq->lock);
 	return res;
 }
@@ -1400,6 +1443,77 @@ int udp_ioctl(struct sock *sk, int cmd, unsigned long arg)
 }
 EXPORT_SYMBOL(udp_ioctl);
 
+struct sk_buff *__skb_recv_udp(struct sock *sk, unsigned int flags,
+			       int noblock, int *peeked, int *off, int *err)
+{
+	struct sk_buff_head *sk_queue = &sk->sk_receive_queue;
+	struct sk_buff_head *queue;
+	struct sk_buff *last;
+	long timeo;
+	int error;
+
+	queue = &udp_sk(sk)->reader_queue;
+	flags |= noblock ? MSG_DONTWAIT : 0;
+	timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
+	do {
+		struct sk_buff *skb;
+
+		error = sock_error(sk);
+		if (error)
+			break;
+
+		error = -EAGAIN;
+		*peeked = 0;
+		do {
+			spin_lock_bh(&queue->lock);
+			skb = __skb_try_recv_from_queue(sk, queue, flags,
+							udp_skb_destructor,
+							peeked, off, err,
+							&last);
+			if (skb) {
+				spin_unlock_bh(&queue->lock);
+				return skb;
+			}
+
+			if (skb_queue_empty(sk_queue)) {
+				spin_unlock_bh(&queue->lock);
+				goto busy_check;
+			}
+
+			/* refill the reader queue and walk it again
+			 * keep both queues locked to avoid re-acquiring
+			 * the sk_receive_queue lock if fwd memory scheduling
+			 * is needed.
+			 */
+			spin_lock(&sk_queue->lock);
+			skb_queue_splice_tail_init(sk_queue, queue);
+
+			skb = __skb_try_recv_from_queue(sk, queue, flags,
+							udp_skb_dtor_locked,
+							peeked, off, err,
+							&last);
+			spin_unlock(&sk_queue->lock);
+			spin_unlock_bh(&queue->lock);
+			if (skb)
+				return skb;
+
+busy_check:
+			if (!sk_can_busy_loop(sk))
+				break;
+
+			sk_busy_loop(sk, flags & MSG_DONTWAIT);
+		} while (!skb_queue_empty(sk_queue));
+
+		/* sk_queue is empty, reader_queue may contain peeked packets */
+	} while (timeo &&
+		 !__skb_wait_for_more_packets(sk, &error, &timeo,
+					      (struct sk_buff *)sk_queue));
+
+	*err = error;
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(__skb_recv_udp);
+
 /*
  * 	This should be easy, if there is something there we
  * 	return it, otherwise we block.
@@ -1490,7 +1604,8 @@ try_again:
 	return err;
 
 csum_copy_err:
-	if (!__sk_queue_drop_skb(sk, skb, flags, udp_skb_destructor)) {
+	if (!__sk_queue_drop_skb(sk, &udp_sk(sk)->reader_queue, skb, flags,
+				 udp_skb_destructor)) {
 		UDP_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite);
 		UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
 	}
@@ -2325,6 +2440,9 @@ unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait)
 	unsigned int mask = datagram_poll(file, sock, wait);
 	struct sock *sk = sock->sk;
 
+	if (!skb_queue_empty(&udp_sk(sk)->reader_queue))
+		mask |= POLLIN | POLLRDNORM;
+
 	sock_rps_record_flow(sk);
 
 	/* Check for false positives due to checksum errors */
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 6a4fb1e629fb..25443fd946a8 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2280,7 +2280,7 @@ addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev,
 		cfg.fc_flags |= RTF_NONEXTHOP;
 #endif
 
-	ip6_route_add(&cfg);
+	ip6_route_add(&cfg, NULL);
 }
 
 
@@ -2335,7 +2335,7 @@ static void addrconf_add_mroute(struct net_device *dev)
 
 	ipv6_addr_set(&cfg.fc_dst, htonl(0xFF000000), 0, 0, 0);
 
-	ip6_route_add(&cfg);
+	ip6_route_add(&cfg, NULL);
 }
 
 static struct inet6_dev *addrconf_add_dev(struct net_device *dev)
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index dda6035e3b84..755f38271dd5 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -423,7 +423,9 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb)
 	ah->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low);
 
 	sg_init_table(sg, nfrags + sglists);
-	skb_to_sgvec_nomark(skb, sg, 0, skb->len);
+	err = skb_to_sgvec_nomark(skb, sg, 0, skb->len);
+	if (unlikely(err < 0))
+		goto out_free;
 
 	if (x->props.flags & XFRM_STATE_ESN) {
 		/* Attach seqhi sg right after packet payload */
@@ -606,7 +608,9 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
 	ip6h->hop_limit   = 0;
 
 	sg_init_table(sg, nfrags + sglists);
-	skb_to_sgvec_nomark(skb, sg, 0, skb->len);
+	err = skb_to_sgvec_nomark(skb, sg, 0, skb->len);
+	if (unlikely(err < 0))
+		goto out_free;
 
 	if (x->props.flags & XFRM_STATE_ESN) {
 		/* Attach seqhi sg right after packet payload */
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 1fe99ba8066c..2ede4e459c4e 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -346,9 +346,11 @@ int esp6_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info
 	esph = esp_output_set_esn(skb, x, ip_esp_hdr(skb), seqhi);
 
 	sg_init_table(sg, esp->nfrags);
-	skb_to_sgvec(skb, sg,
-		     (unsigned char *)esph - skb->data,
-		     assoclen + ivlen + esp->clen + alen);
+	err = skb_to_sgvec(skb, sg,
+		           (unsigned char *)esph - skb->data,
+		           assoclen + ivlen + esp->clen + alen);
+	if (unlikely(err < 0))
+		goto error;
 
 	if (!esp->inplace) {
 		int allocsize;
@@ -372,9 +374,11 @@ int esp6_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info
 		spin_unlock_bh(&x->lock);
 
 		sg_init_table(dsg, skb_shinfo(skb)->nr_frags + 1);
-		skb_to_sgvec(skb, dsg,
-			     (unsigned char *)esph - skb->data,
-			     assoclen + ivlen + esp->clen + alen);
+		err = skb_to_sgvec(skb, dsg,
+			           (unsigned char *)esph - skb->data,
+			           assoclen + ivlen + esp->clen + alen);
+		if (unlikely(err < 0))
+			goto error;
 	}
 
 	if ((x->props.flags & XFRM_STATE_ESN))
@@ -618,7 +622,9 @@ skip_cow:
 	esp_input_set_header(skb, seqhi);
 
 	sg_init_table(sg, nfrags);
-	skb_to_sgvec(skb, sg, 0, skb->len);
+	ret = skb_to_sgvec(skb, sg, 0, skb->len);
+	if (unlikely(ret < 0))
+		goto out;
 
 	skb->ip_summed = CHECKSUM_NONE;
 
diff --git a/net/ipv6/fou6.c b/net/ipv6/fou6.c
index 9ea249b9451e..6de3c04b0f30 100644
--- a/net/ipv6/fou6.c
+++ b/net/ipv6/fou6.c
@@ -14,6 +14,8 @@
 #include <net/udp.h>
 #include <net/udp_tunnel.h>
 
+#if IS_ENABLED(CONFIG_IPV6_FOU_TUNNEL)
+
 static void fou6_build_udp(struct sk_buff *skb, struct ip_tunnel_encap *e,
 			   struct flowi6 *fl6, u8 *protocol, __be16 sport)
 {
@@ -33,8 +35,8 @@ static void fou6_build_udp(struct sk_buff *skb, struct ip_tunnel_encap *e,
 	*protocol = IPPROTO_UDP;
 }
 
-int fou6_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
-		      u8 *protocol, struct flowi6 *fl6)
+static int fou6_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
+			     u8 *protocol, struct flowi6 *fl6)
 {
 	__be16 sport;
 	int err;
@@ -49,10 +51,9 @@ int fou6_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
 
 	return 0;
 }
-EXPORT_SYMBOL(fou6_build_header);
 
-int gue6_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
-		      u8 *protocol, struct flowi6 *fl6)
+static int gue6_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
+			     u8 *protocol, struct flowi6 *fl6)
 {
 	__be16 sport;
 	int err;
@@ -67,9 +68,6 @@ int gue6_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
 
 	return 0;
 }
-EXPORT_SYMBOL(gue6_build_header);
-
-#if IS_ENABLED(CONFIG_IPV6_FOU_TUNNEL)
 
 static const struct ip6_tnl_encap_ops fou_ip6tun_ops = {
 	.encap_hlen = fou_encap_hlen,
diff --git a/net/ipv6/ila/ila_lwt.c b/net/ipv6/ila/ila_lwt.c
index b3df03e3faa0..0c02a09bc351 100644
--- a/net/ipv6/ila/ila_lwt.c
+++ b/net/ipv6/ila/ila_lwt.c
@@ -91,7 +91,7 @@ static int ila_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 
 drop:
 	kfree_skb(skb);
-	return -EINVAL;
+	return err;
 }
 
 static int ila_input(struct sk_buff *skb)
@@ -117,7 +117,8 @@ static const struct nla_policy ila_nl_policy[ILA_ATTR_MAX + 1] = {
 
 static int ila_build_state(struct nlattr *nla,
 			   unsigned int family, const void *cfg,
-			   struct lwtunnel_state **ts)
+			   struct lwtunnel_state **ts,
+			   struct netlink_ext_ack *extack)
 {
 	struct ila_lwt *ilwt;
 	struct ila_params *p;
@@ -146,7 +147,7 @@ static int ila_build_state(struct nlattr *nla,
 		return -EINVAL;
 	}
 
-	ret = nla_parse_nested(tb, ILA_ATTR_MAX, nla, ila_nl_policy, NULL);
+	ret = nla_parse_nested(tb, ILA_ATTR_MAX, nla, ila_nl_policy, extack);
 	if (ret < 0)
 		return ret;
 
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index d4bf2c68a545..deea901746c8 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -473,7 +473,8 @@ out:
 static struct fib6_node *fib6_add_1(struct fib6_node *root,
 				     struct in6_addr *addr, int plen,
 				     int offset, int allow_create,
-				     int replace_required, int sernum)
+				     int replace_required, int sernum,
+				     struct netlink_ext_ack *extack)
 {
 	struct fib6_node *fn, *in, *ln;
 	struct fib6_node *pn = NULL;
@@ -497,6 +498,8 @@ static struct fib6_node *fib6_add_1(struct fib6_node *root,
 		    !ipv6_prefix_equal(&key->addr, addr, fn->fn_bit)) {
 			if (!allow_create) {
 				if (replace_required) {
+					NL_SET_ERR_MSG(extack,
+						       "Can not replace route - no match found");
 					pr_warn("Can't replace route, no match found\n");
 					return ERR_PTR(-ENOENT);
 				}
@@ -543,6 +546,8 @@ static struct fib6_node *fib6_add_1(struct fib6_node *root,
 		 * That would keep IPv6 consistent with IPv4
 		 */
 		if (replace_required) {
+			NL_SET_ERR_MSG(extack,
+				       "Can not replace route - no match found");
 			pr_warn("Can't replace route, no match found\n");
 			return ERR_PTR(-ENOENT);
 		}
@@ -964,7 +969,8 @@ void fib6_force_start_gc(struct net *net)
  */
 
 int fib6_add(struct fib6_node *root, struct rt6_info *rt,
-	     struct nl_info *info, struct mx6_config *mxc)
+	     struct nl_info *info, struct mx6_config *mxc,
+	     struct netlink_ext_ack *extack)
 {
 	struct fib6_node *fn, *pn = NULL;
 	int err = -ENOMEM;
@@ -987,7 +993,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
 
 	fn = fib6_add_1(root, &rt->rt6i_dst.addr, rt->rt6i_dst.plen,
 			offsetof(struct rt6_info, rt6i_dst), allow_create,
-			replace_required, sernum);
+			replace_required, sernum, extack);
 	if (IS_ERR(fn)) {
 		err = PTR_ERR(fn);
 		fn = NULL;
@@ -1028,7 +1034,8 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
 			sn = fib6_add_1(sfn, &rt->rt6i_src.addr,
 					rt->rt6i_src.plen,
 					offsetof(struct rt6_info, rt6i_src),
-					allow_create, replace_required, sernum);
+					allow_create, replace_required, sernum,
+					extack);
 
 			if (IS_ERR(sn)) {
 				/* If it is failed, discard just allocated
@@ -1047,7 +1054,8 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
 			sn = fib6_add_1(fn->subtree, &rt->rt6i_src.addr,
 					rt->rt6i_src.plen,
 					offsetof(struct rt6_info, rt6i_src),
-					allow_create, replace_required, sernum);
+					allow_create, replace_required, sernum,
+					extack);
 
 			if (IS_ERR(sn)) {
 				err = PTR_ERR(sn);
diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c
index eedee5d108d9..f63b18e05c69 100644
--- a/net/ipv6/netfilter/nf_reject_ipv6.c
+++ b/net/ipv6/netfilter/nf_reject_ipv6.c
@@ -220,9 +220,6 @@ static bool reject6_csum_ok(struct sk_buff *skb, int hook)
 	__be16 fo;
 	u8 proto;
 
-	if (skb->csum_bad)
-		return false;
-
 	if (skb_csum_unnecessary(skb))
 		return true;
 
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index dc61b0b5e64e..9d9b5bbea153 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -938,14 +938,15 @@ EXPORT_SYMBOL(rt6_lookup);
  */
 
 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
-			struct mx6_config *mxc)
+			struct mx6_config *mxc,
+			struct netlink_ext_ack *extack)
 {
 	int err;
 	struct fib6_table *table;
 
 	table = rt->rt6i_table;
 	write_lock_bh(&table->tb6_lock);
-	err = fib6_add(&table->tb6_root, rt, info, mxc);
+	err = fib6_add(&table->tb6_root, rt, info, mxc, extack);
 	write_unlock_bh(&table->tb6_lock);
 
 	return err;
@@ -956,7 +957,7 @@ int ip6_ins_rt(struct rt6_info *rt)
 	struct nl_info info = {	.nl_net = dev_net(rt->dst.dev), };
 	struct mx6_config mxc = { .mx = NULL, };
 
-	return __ip6_ins_rt(rt, &info, &mxc);
+	return __ip6_ins_rt(rt, &info, &mxc, NULL);
 }
 
 static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
@@ -1844,7 +1845,8 @@ static struct rt6_info *ip6_nh_lookup_table(struct net *net,
 	return rt;
 }
 
-static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg)
+static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
+					      struct netlink_ext_ack *extack)
 {
 	struct net *net = cfg->fc_nlinfo.nl_net;
 	struct rt6_info *rt = NULL;
@@ -1855,14 +1857,25 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg)
 	int err = -EINVAL;
 
 	/* RTF_PCPU is an internal flag; can not be set by userspace */
-	if (cfg->fc_flags & RTF_PCPU)
+	if (cfg->fc_flags & RTF_PCPU) {
+		NL_SET_ERR_MSG(extack, "Userspace can not set RTF_PCPU");
 		goto out;
+	}
 
-	if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
+	if (cfg->fc_dst_len > 128) {
+		NL_SET_ERR_MSG(extack, "Invalid prefix length");
+		goto out;
+	}
+	if (cfg->fc_src_len > 128) {
+		NL_SET_ERR_MSG(extack, "Invalid source address length");
 		goto out;
+	}
 #ifndef CONFIG_IPV6_SUBTREES
-	if (cfg->fc_src_len)
+	if (cfg->fc_src_len) {
+		NL_SET_ERR_MSG(extack,
+			       "Specifying source address requires IPV6_SUBTREES to be enabled");
 		goto out;
+	}
 #endif
 	if (cfg->fc_ifindex) {
 		err = -ENODEV;
@@ -1926,7 +1939,7 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg)
 
 		err = lwtunnel_build_state(cfg->fc_encap_type,
 					   cfg->fc_encap, AF_INET6, cfg,
-					   &lwtstate);
+					   &lwtstate, extack);
 		if (err)
 			goto out;
 		rt->dst.lwtstate = lwtstate_get(lwtstate);
@@ -2013,9 +2026,10 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg)
 		err = -EINVAL;
 		if (ipv6_chk_addr_and_flags(net, gw_addr,
 					    gwa_type & IPV6_ADDR_LINKLOCAL ?
-					    dev : NULL, 0, 0))
+					    dev : NULL, 0, 0)) {
+			NL_SET_ERR_MSG(extack, "Invalid gateway address");
 			goto out;
-
+		}
 		rt->rt6i_gateway = *gw_addr;
 
 		if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
@@ -2031,8 +2045,11 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg)
 			   addressing
 			 */
 			if (!(gwa_type & (IPV6_ADDR_UNICAST |
-					  IPV6_ADDR_MAPPED)))
+					  IPV6_ADDR_MAPPED))) {
+				NL_SET_ERR_MSG(extack,
+					       "Invalid gateway address");
 				goto out;
+			}
 
 			if (cfg->fc_table) {
 				grt = ip6_nh_lookup_table(net, cfg, gw_addr);
@@ -2072,8 +2089,14 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg)
 				goto out;
 		}
 		err = -EINVAL;
-		if (!dev || (dev->flags & IFF_LOOPBACK))
+		if (!dev) {
+			NL_SET_ERR_MSG(extack, "Egress device not specified");
+			goto out;
+		} else if (dev->flags & IFF_LOOPBACK) {
+			NL_SET_ERR_MSG(extack,
+				       "Egress device can not be loopback device for this route");
 			goto out;
+		}
 	}
 
 	err = -ENODEV;
@@ -2082,6 +2105,7 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg)
 
 	if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
 		if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
+			NL_SET_ERR_MSG(extack, "Invalid source address");
 			err = -EINVAL;
 			goto out;
 		}
@@ -2111,13 +2135,14 @@ out:
 	return ERR_PTR(err);
 }
 
-int ip6_route_add(struct fib6_config *cfg)
+int ip6_route_add(struct fib6_config *cfg,
+		  struct netlink_ext_ack *extack)
 {
 	struct mx6_config mxc = { .mx = NULL, };
 	struct rt6_info *rt;
 	int err;
 
-	rt = ip6_route_info_create(cfg);
+	rt = ip6_route_info_create(cfg, extack);
 	if (IS_ERR(rt)) {
 		err = PTR_ERR(rt);
 		rt = NULL;
@@ -2128,7 +2153,7 @@ int ip6_route_add(struct fib6_config *cfg)
 	if (err)
 		goto out;
 
-	err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc);
+	err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc, extack);
 
 	kfree(mxc.mx);
 
@@ -2222,7 +2247,8 @@ out_put:
 	return err;
 }
 
-static int ip6_route_del(struct fib6_config *cfg)
+static int ip6_route_del(struct fib6_config *cfg,
+			 struct netlink_ext_ack *extack)
 {
 	struct fib6_table *table;
 	struct fib6_node *fn;
@@ -2230,8 +2256,10 @@ static int ip6_route_del(struct fib6_config *cfg)
 	int err = -ESRCH;
 
 	table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
-	if (!table)
+	if (!table) {
+		NL_SET_ERR_MSG(extack, "FIB table does not exist");
 		return err;
+	}
 
 	read_lock_bh(&table->tb6_lock);
 
@@ -2483,7 +2511,7 @@ static struct rt6_info *rt6_add_route_info(struct net *net,
 	if (!prefixlen)
 		cfg.fc_flags |= RTF_DEFAULT;
 
-	ip6_route_add(&cfg);
+	ip6_route_add(&cfg, NULL);
 
 	return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev);
 }
@@ -2529,7 +2557,7 @@ struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
 
 	cfg.fc_gateway = *gwaddr;
 
-	if (!ip6_route_add(&cfg)) {
+	if (!ip6_route_add(&cfg, NULL)) {
 		struct fib6_table *table;
 
 		table = fib6_get_table(dev_net(dev), cfg.fc_table);
@@ -2622,10 +2650,10 @@ int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
 		rtnl_lock();
 		switch (cmd) {
 		case SIOCADDRT:
-			err = ip6_route_add(&cfg);
+			err = ip6_route_add(&cfg, NULL);
 			break;
 		case SIOCDELRT:
-			err = ip6_route_del(&cfg);
+			err = ip6_route_del(&cfg, NULL);
 			break;
 		default:
 			err = -EINVAL;
@@ -2903,7 +2931,8 @@ static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
 };
 
 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
-			      struct fib6_config *cfg)
+			      struct fib6_config *cfg,
+			      struct netlink_ext_ack *extack)
 {
 	struct rtmsg *rtm;
 	struct nlattr *tb[RTA_MAX+1];
@@ -2987,7 +3016,7 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
 		cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
 
 		err = lwtunnel_valid_encap_type_attr(cfg->fc_mp,
-						     cfg->fc_mp_len);
+						     cfg->fc_mp_len, extack);
 		if (err < 0)
 			goto errout;
 	}
@@ -3006,7 +3035,7 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
 	if (tb[RTA_ENCAP_TYPE]) {
 		cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
 
-		err = lwtunnel_valid_encap_type(cfg->fc_encap_type);
+		err = lwtunnel_valid_encap_type(cfg->fc_encap_type, extack);
 		if (err < 0)
 			goto errout;
 	}
@@ -3097,7 +3126,8 @@ static void ip6_route_mpath_notify(struct rt6_info *rt,
 		inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
 }
 
-static int ip6_route_multipath_add(struct fib6_config *cfg)
+static int ip6_route_multipath_add(struct fib6_config *cfg,
+				   struct netlink_ext_ack *extack)
 {
 	struct rt6_info *rt_notif = NULL, *rt_last = NULL;
 	struct nl_info *info = &cfg->fc_nlinfo;
@@ -3145,7 +3175,7 @@ static int ip6_route_multipath_add(struct fib6_config *cfg)
 				r_cfg.fc_encap_type = nla_get_u16(nla);
 		}
 
-		rt = ip6_route_info_create(&r_cfg);
+		rt = ip6_route_info_create(&r_cfg, extack);
 		if (IS_ERR(rt)) {
 			err = PTR_ERR(rt);
 			rt = NULL;
@@ -3170,7 +3200,7 @@ static int ip6_route_multipath_add(struct fib6_config *cfg)
 	err_nh = NULL;
 	list_for_each_entry(nh, &rt6_nh_list, next) {
 		rt_last = nh->rt6_info;
-		err = __ip6_ins_rt(nh->rt6_info, info, &nh->mxc);
+		err = __ip6_ins_rt(nh->rt6_info, info, &nh->mxc, extack);
 		/* save reference to first route for notification */
 		if (!rt_notif && !err)
 			rt_notif = nh->rt6_info;
@@ -3212,7 +3242,7 @@ add_errout:
 	list_for_each_entry(nh, &rt6_nh_list, next) {
 		if (err_nh == nh)
 			break;
-		ip6_route_del(&nh->r_cfg);
+		ip6_route_del(&nh->r_cfg, extack);
 	}
 
 cleanup:
@@ -3227,7 +3257,8 @@ cleanup:
 	return err;
 }
 
-static int ip6_route_multipath_del(struct fib6_config *cfg)
+static int ip6_route_multipath_del(struct fib6_config *cfg,
+				   struct netlink_ext_ack *extack)
 {
 	struct fib6_config r_cfg;
 	struct rtnexthop *rtnh;
@@ -3254,7 +3285,7 @@ static int ip6_route_multipath_del(struct fib6_config *cfg)
 				r_cfg.fc_flags |= RTF_GATEWAY;
 			}
 		}
-		err = ip6_route_del(&r_cfg);
+		err = ip6_route_del(&r_cfg, extack);
 		if (err)
 			last_err = err;
 
@@ -3270,15 +3301,15 @@ static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
 	struct fib6_config cfg;
 	int err;
 
-	err = rtm_to_fib6_config(skb, nlh, &cfg);
+	err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
 	if (err < 0)
 		return err;
 
 	if (cfg.fc_mp)
-		return ip6_route_multipath_del(&cfg);
+		return ip6_route_multipath_del(&cfg, extack);
 	else {
 		cfg.fc_delete_all_nh = 1;
-		return ip6_route_del(&cfg);
+		return ip6_route_del(&cfg, extack);
 	}
 }
 
@@ -3288,14 +3319,14 @@ static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
 	struct fib6_config cfg;
 	int err;
 
-	err = rtm_to_fib6_config(skb, nlh, &cfg);
+	err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
 	if (err < 0)
 		return err;
 
 	if (cfg.fc_mp)
-		return ip6_route_multipath_add(&cfg);
+		return ip6_route_multipath_add(&cfg, extack);
 	else
-		return ip6_route_add(&cfg);
+		return ip6_route_add(&cfg, extack);
 }
 
 static size_t rt6_nlmsg_size(struct rt6_info *rt)
@@ -3576,11 +3607,13 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
 {
 	struct net *net = sock_net(in_skb->sk);
 	struct nlattr *tb[RTA_MAX+1];
+	int err, iif = 0, oif = 0;
+	struct dst_entry *dst;
 	struct rt6_info *rt;
 	struct sk_buff *skb;
 	struct rtmsg *rtm;
 	struct flowi6 fl6;
-	int err, iif = 0, oif = 0;
+	bool fibmatch;
 
 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
 			  extack);
@@ -3591,6 +3624,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
 	memset(&fl6, 0, sizeof(fl6));
 	rtm = nlmsg_data(nlh);
 	fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
+	fibmatch = !!(rtm->rtm_flags & RTM_F_FIB_MATCH);
 
 	if (tb[RTA_SRC]) {
 		if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
@@ -3636,12 +3670,23 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
 		if (!ipv6_addr_any(&fl6.saddr))
 			flags |= RT6_LOOKUP_F_HAS_SADDR;
 
-		rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
-							       flags);
+		if (!fibmatch)
+			dst = ip6_route_input_lookup(net, dev, &fl6, flags);
 	} else {
 		fl6.flowi6_oif = oif;
 
-		rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
+		if (!fibmatch)
+			dst = ip6_route_output(net, NULL, &fl6);
+	}
+
+	if (fibmatch)
+		dst = ip6_route_lookup(net, &fl6, 0);
+
+	rt = container_of(dst, struct rt6_info, dst);
+	if (rt->dst.error) {
+		err = rt->dst.error;
+		ip6_rt_put(rt);
+		goto errout;
 	}
 
 	if (rt == net->ipv6.ip6_null_entry) {
@@ -3658,10 +3703,14 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
 	}
 
 	skb_dst_set(skb, &rt->dst);
-
-	err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
-			    RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
-			    nlh->nlmsg_seq, 0);
+	if (fibmatch)
+		err = rt6_fill_node(net, skb, rt, NULL, NULL, iif,
+				    RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
+				    nlh->nlmsg_seq, 0);
+	else
+		err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
+				    RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
+				    nlh->nlmsg_seq, 0);
 	if (err < 0) {
 		kfree_skb(skb);
 		goto errout;
diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c
index 5f44ffed2576..15fba55e3da8 100644
--- a/net/ipv6/seg6.c
+++ b/net/ipv6/seg6.c
@@ -303,13 +303,9 @@ static int seg6_genl_dumphmac_done(struct netlink_callback *cb)
 static int seg6_genl_dumphmac(struct sk_buff *skb, struct netlink_callback *cb)
 {
 	struct rhashtable_iter *iter = (struct rhashtable_iter *)cb->args[0];
-	struct net *net = sock_net(skb->sk);
-	struct seg6_pernet_data *sdata;
 	struct seg6_hmac_info *hinfo;
 	int ret;
 
-	sdata = seg6_pernet(net);
-
 	ret = rhashtable_walk_start(iter);
 	if (ret && ret != -EAGAIN)
 		goto done;
diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c
index 6a495490d43e..264d772d3c7d 100644
--- a/net/ipv6/seg6_iptunnel.c
+++ b/net/ipv6/seg6_iptunnel.c
@@ -326,7 +326,8 @@ drop:
 
 static int seg6_build_state(struct nlattr *nla,
 			    unsigned int family, const void *cfg,
-			    struct lwtunnel_state **ts)
+			    struct lwtunnel_state **ts,
+			    struct netlink_ext_ack *extack)
 {
 	struct nlattr *tb[SEG6_IPTUNNEL_MAX + 1];
 	struct seg6_iptunnel_encap *tuninfo;
@@ -336,7 +337,7 @@ static int seg6_build_state(struct nlattr *nla,
 	int err;
 
 	err = nla_parse_nested(tb, SEG6_IPTUNNEL_MAX, nla,
-			       seg6_iptunnel_policy, NULL);
+			       seg6_iptunnel_policy, extack);
 
 	if (err < 0)
 		return err;
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 5abc3692b901..971823359f5b 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -211,7 +211,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 	ireq->wscale_ok		= tcp_opt.wscale_ok;
 	ireq->tstamp_ok		= tcp_opt.saw_tstamp;
 	req->ts_recent		= tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0;
-	treq->snt_synack.v64	= 0;
+	treq->snt_synack	= 0;
 	treq->rcv_isn = ntohl(th->seq) - 1;
 	treq->snt_isn = cookie;
 	treq->ts_off = 0;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 4f4310a36a04..233edfabe1db 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -949,7 +949,7 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
 
 	tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
 			tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
-			tcp_time_stamp + tcptw->tw_ts_offset,
+			tcp_time_stamp_raw() + tcptw->tw_ts_offset,
 			tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
 			tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel));
 
@@ -971,7 +971,7 @@ static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
 			tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
 			tcp_rsk(req)->rcv_nxt,
 			req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
-			tcp_time_stamp + tcp_rsk(req)->ts_off,
+			tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
 			req->ts_recent, sk->sk_bound_dev_if,
 			tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr),
 			0, 0);
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 06ec39b79609..2e9b52bded2d 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -455,7 +455,8 @@ try_again:
 	return err;
 
 csum_copy_err:
-	if (!__sk_queue_drop_skb(sk, skb, flags, udp_skb_destructor)) {
+	if (!__sk_queue_drop_skb(sk, &udp_sk(sk)->reader_queue, skb, flags,
+				 udp_skb_destructor)) {
 		if (is_udp4) {
 			UDP_INC_STATS(sock_net(sk),
 				      UDP_MIB_CSUMERRORS, is_udplite);
diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index deca20fb2ce2..da49191f7ad0 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@ -1985,7 +1985,7 @@ static int kcm_create(struct net *net, struct socket *sock,
 	return 0;
 }
 
-static struct net_proto_family kcm_family_ops = {
+static const struct net_proto_family kcm_family_ops = {
 	.family = PF_KCM,
 	.create = kcm_create,
 	.owner  = THIS_MODULE,
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index 7b05fd1497ce..b51582d92740 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -684,6 +684,54 @@ errout:
 	return err;
 }
 
+static int nla_get_via(const struct nlattr *nla, u8 *via_alen, u8 *via_table,
+		       u8 via_addr[], struct netlink_ext_ack *extack)
+{
+	struct rtvia *via = nla_data(nla);
+	int err = -EINVAL;
+	int alen;
+
+	if (nla_len(nla) < offsetof(struct rtvia, rtvia_addr)) {
+		NL_SET_ERR_MSG_ATTR(extack, nla,
+				    "Invalid attribute length for RTA_VIA");
+		goto errout;
+	}
+	alen = nla_len(nla) -
+			offsetof(struct rtvia, rtvia_addr);
+	if (alen > MAX_VIA_ALEN) {
+		NL_SET_ERR_MSG_ATTR(extack, nla,
+				    "Invalid address length for RTA_VIA");
+		goto errout;
+	}
+
+	/* Validate the address family */
+	switch (via->rtvia_family) {
+	case AF_PACKET:
+		*via_table = NEIGH_LINK_TABLE;
+		break;
+	case AF_INET:
+		*via_table = NEIGH_ARP_TABLE;
+		if (alen != 4)
+			goto errout;
+		break;
+	case AF_INET6:
+		*via_table = NEIGH_ND_TABLE;
+		if (alen != 16)
+			goto errout;
+		break;
+	default:
+		/* Unsupported address family */
+		goto errout;
+	}
+
+	memcpy(via_addr, via->rtvia_addr, alen);
+	*via_alen = alen;
+	err = 0;
+
+errout:
+	return err;
+}
+
 static int mpls_nh_build_from_cfg(struct mpls_route_config *cfg,
 				  struct mpls_route *rt)
 {
@@ -695,8 +743,6 @@ static int mpls_nh_build_from_cfg(struct mpls_route_config *cfg,
 	if (!nh)
 		return -ENOMEM;
 
-	err = -EINVAL;
-
 	nh->nh_labels = cfg->rc_output_labels;
 	for (i = 0; i < nh->nh_labels; i++)
 		nh->nh_label[i] = cfg->rc_output_label[i];
@@ -720,7 +766,8 @@ errout:
 
 static int mpls_nh_build(struct net *net, struct mpls_route *rt,
 			 struct mpls_nh *nh, int oif, struct nlattr *via,
-			 struct nlattr *newdst, u8 max_labels)
+			 struct nlattr *newdst, u8 max_labels,
+			 struct netlink_ext_ack *extack)
 {
 	int err = -ENOMEM;
 
@@ -728,15 +775,15 @@ static int mpls_nh_build(struct net *net, struct mpls_route *rt,
 		goto errout;
 
 	if (newdst) {
-		err = nla_get_labels(newdst, max_labels,
-				     &nh->nh_labels, nh->nh_label);
+		err = nla_get_labels(newdst, max_labels, &nh->nh_labels,
+				     nh->nh_label, extack);
 		if (err)
 			goto errout;
 	}
 
 	if (via) {
 		err = nla_get_via(via, &nh->nh_via_alen, &nh->nh_via_table,
-				  __mpls_nh_via(rt, nh));
+				  __mpls_nh_via(rt, nh), extack);
 		if (err)
 			goto errout;
 	} else {
@@ -782,7 +829,8 @@ static u8 mpls_count_nexthops(struct rtnexthop *rtnh, int len,
 
 		nla = nla_find(attrs, attrlen, RTA_NEWDST);
 		if (nla &&
-		    nla_get_labels(nla, MAX_NEW_LABELS, &n_labels, NULL) != 0)
+		    nla_get_labels(nla, MAX_NEW_LABELS, &n_labels,
+				   NULL, NULL) != 0)
 			return 0;
 
 		*max_labels = max_t(u8, *max_labels, n_labels);
@@ -802,7 +850,8 @@ static u8 mpls_count_nexthops(struct rtnexthop *rtnh, int len,
 }
 
 static int mpls_nh_build_multi(struct mpls_route_config *cfg,
-			       struct mpls_route *rt, u8 max_labels)
+			       struct mpls_route *rt, u8 max_labels,
+			       struct netlink_ext_ack *extack)
 {
 	struct rtnexthop *rtnh = cfg->rc_mp;
 	struct nlattr *nla_via, *nla_newdst;
@@ -836,7 +885,7 @@ static int mpls_nh_build_multi(struct mpls_route_config *cfg,
 
 		err = mpls_nh_build(cfg->rc_nlinfo.nl_net, rt, nh,
 				    rtnh->rtnh_ifindex, nla_via, nla_newdst,
-				    max_labels);
+				    max_labels, extack);
 		if (err)
 			goto errout;
 
@@ -855,7 +904,28 @@ errout:
 	return err;
 }
 
-static int mpls_route_add(struct mpls_route_config *cfg)
+static bool mpls_label_ok(struct net *net, unsigned int index,
+			  struct netlink_ext_ack *extack)
+{
+	/* Reserved labels may not be set */
+	if (index < MPLS_LABEL_FIRST_UNRESERVED) {
+		NL_SET_ERR_MSG(extack,
+			       "Invalid label - must be MPLS_LABEL_FIRST_UNRESERVED or higher");
+		return false;
+	}
+
+	/* The full 20 bit range may not be supported. */
+	if (index >= net->mpls.platform_labels) {
+		NL_SET_ERR_MSG(extack,
+			       "Label >= configured maximum in platform_labels");
+		return false;
+	}
+
+	return true;
+}
+
+static int mpls_route_add(struct mpls_route_config *cfg,
+			  struct netlink_ext_ack *extack)
 {
 	struct mpls_route __rcu **platform_label;
 	struct net *net = cfg->rc_nlinfo.nl_net;
@@ -874,18 +944,15 @@ static int mpls_route_add(struct mpls_route_config *cfg)
 		index = find_free_label(net);
 	}
 
-	/* Reserved labels may not be set */
-	if (index < MPLS_LABEL_FIRST_UNRESERVED)
-		goto errout;
-
-	/* The full 20 bit range may not be supported. */
-	if (index >= net->mpls.platform_labels)
+	if (!mpls_label_ok(net, index, extack))
 		goto errout;
 
 	/* Append makes no sense with mpls */
 	err = -EOPNOTSUPP;
-	if (cfg->rc_nlflags & NLM_F_APPEND)
+	if (cfg->rc_nlflags & NLM_F_APPEND) {
+		NL_SET_ERR_MSG(extack, "MPLS does not support route append");
 		goto errout;
+	}
 
 	err = -EEXIST;
 	platform_label = rtnl_dereference(net->mpls.platform_label);
@@ -912,8 +979,10 @@ static int mpls_route_add(struct mpls_route_config *cfg)
 		nhs = 1;
 	}
 
-	if (nhs == 0)
+	if (nhs == 0) {
+		NL_SET_ERR_MSG(extack, "Route does not contain a nexthop");
 		goto errout;
+	}
 
 	err = -ENOMEM;
 	rt = mpls_rt_alloc(nhs, max_via_alen, max_labels);
@@ -927,7 +996,7 @@ static int mpls_route_add(struct mpls_route_config *cfg)
 	rt->rt_ttl_propagate = cfg->rc_ttl_propagate;
 
 	if (cfg->rc_mp)
-		err = mpls_nh_build_multi(cfg, rt, max_labels);
+		err = mpls_nh_build_multi(cfg, rt, max_labels, extack);
 	else
 		err = mpls_nh_build_from_cfg(cfg, rt);
 	if (err)
@@ -943,7 +1012,8 @@ errout:
 	return err;
 }
 
-static int mpls_route_del(struct mpls_route_config *cfg)
+static int mpls_route_del(struct mpls_route_config *cfg,
+			  struct netlink_ext_ack *extack)
 {
 	struct net *net = cfg->rc_nlinfo.nl_net;
 	unsigned index;
@@ -951,12 +1021,7 @@ static int mpls_route_del(struct mpls_route_config *cfg)
 
 	index = cfg->rc_label;
 
-	/* Reserved labels may not be removed */
-	if (index < MPLS_LABEL_FIRST_UNRESERVED)
-		goto errout;
-
-	/* The full 20 bit range may not be supported */
-	if (index >= net->mpls.platform_labels)
+	if (!mpls_label_ok(net, index, extack))
 		goto errout;
 
 	mpls_route_update(net, index, NULL, &cfg->rc_nlinfo);
@@ -1541,8 +1606,8 @@ int nla_put_labels(struct sk_buff *skb, int attrtype,
 }
 EXPORT_SYMBOL_GPL(nla_put_labels);
 
-int nla_get_labels(const struct nlattr *nla,
-		   u8 max_labels, u8 *labels, u32 label[])
+int nla_get_labels(const struct nlattr *nla, u8 max_labels, u8 *labels,
+		   u32 label[], struct netlink_ext_ack *extack)
 {
 	unsigned len = nla_len(nla);
 	struct mpls_shim_hdr *nla_label;
@@ -1553,13 +1618,18 @@ int nla_get_labels(const struct nlattr *nla,
 	/* len needs to be an even multiple of 4 (the label size). Number
 	 * of labels is a u8 so check for overflow.
 	 */
-	if (len & 3 || len / 4 > 255)
+	if (len & 3 || len / 4 > 255) {
+		NL_SET_ERR_MSG_ATTR(extack, nla,
+				    "Invalid length for labels attribute");
 		return -EINVAL;
+	}
 
 	/* Limit the number of new labels allowed */
 	nla_labels = len/4;
-	if (nla_labels > max_labels)
+	if (nla_labels > max_labels) {
+		NL_SET_ERR_MSG(extack, "Too many labels");
 		return -EINVAL;
+	}
 
 	/* when label == NULL, caller wants number of labels */
 	if (!label)
@@ -1574,8 +1644,29 @@ int nla_get_labels(const struct nlattr *nla,
 		/* Ensure the bottom of stack flag is properly set
 		 * and ttl and tc are both clear.
 		 */
-		if ((dec.bos != bos) || dec.ttl || dec.tc)
+		if (dec.ttl) {
+			NL_SET_ERR_MSG_ATTR(extack, nla,
+					    "TTL in label must be 0");
+			return -EINVAL;
+		}
+
+		if (dec.tc) {
+			NL_SET_ERR_MSG_ATTR(extack, nla,
+					    "Traffic class in label must be 0");
 			return -EINVAL;
+		}
+
+		if (dec.bos != bos) {
+			NL_SET_BAD_ATTR(extack, nla);
+			if (bos) {
+				NL_SET_ERR_MSG(extack,
+					       "BOS bit must be set in first label");
+			} else {
+				NL_SET_ERR_MSG(extack,
+					       "BOS bit can only be set in first label");
+			}
+			return -EINVAL;
+		}
 
 		switch (dec.label) {
 		case MPLS_LABEL_IMPLNULL:
@@ -1583,6 +1674,8 @@ int nla_get_labels(const struct nlattr *nla,
 			 * assign and distribute, but which never
 			 * actually appears in the encapsulation.
 			 */
+			NL_SET_ERR_MSG_ATTR(extack, nla,
+					    "Implicit NULL Label (3) can not be used in encapsulation");
 			return -EINVAL;
 		}
 
@@ -1594,50 +1687,10 @@ out:
 }
 EXPORT_SYMBOL_GPL(nla_get_labels);
 
-int nla_get_via(const struct nlattr *nla, u8 *via_alen,
-		u8 *via_table, u8 via_addr[])
-{
-	struct rtvia *via = nla_data(nla);
-	int err = -EINVAL;
-	int alen;
-
-	if (nla_len(nla) < offsetof(struct rtvia, rtvia_addr))
-		goto errout;
-	alen = nla_len(nla) -
-			offsetof(struct rtvia, rtvia_addr);
-	if (alen > MAX_VIA_ALEN)
-		goto errout;
-
-	/* Validate the address family */
-	switch (via->rtvia_family) {
-	case AF_PACKET:
-		*via_table = NEIGH_LINK_TABLE;
-		break;
-	case AF_INET:
-		*via_table = NEIGH_ARP_TABLE;
-		if (alen != 4)
-			goto errout;
-		break;
-	case AF_INET6:
-		*via_table = NEIGH_ND_TABLE;
-		if (alen != 16)
-			goto errout;
-		break;
-	default:
-		/* Unsupported address family */
-		goto errout;
-	}
-
-	memcpy(via_addr, via->rtvia_addr, alen);
-	*via_alen = alen;
-	err = 0;
-
-errout:
-	return err;
-}
-
-static int rtm_to_route_config(struct sk_buff *skb,  struct nlmsghdr *nlh,
-			       struct mpls_route_config *cfg)
+static int rtm_to_route_config(struct sk_buff *skb,
+			       struct nlmsghdr *nlh,
+			       struct mpls_route_config *cfg,
+			       struct netlink_ext_ack *extack)
 {
 	struct rtmsg *rtm;
 	struct nlattr *tb[RTA_MAX+1];
@@ -1645,35 +1698,54 @@ static int rtm_to_route_config(struct sk_buff *skb,  struct nlmsghdr *nlh,
 	int err;
 
 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_mpls_policy,
-			  NULL);
+			  extack);
 	if (err < 0)
 		goto errout;
 
 	err = -EINVAL;
 	rtm = nlmsg_data(nlh);
 
-	if (rtm->rtm_family != AF_MPLS)
+	if (rtm->rtm_family != AF_MPLS) {
+		NL_SET_ERR_MSG(extack, "Invalid address family in rtmsg");
 		goto errout;
-	if (rtm->rtm_dst_len != 20)
+	}
+	if (rtm->rtm_dst_len != 20) {
+		NL_SET_ERR_MSG(extack, "rtm_dst_len must be 20 for MPLS");
 		goto errout;
-	if (rtm->rtm_src_len != 0)
+	}
+	if (rtm->rtm_src_len != 0) {
+		NL_SET_ERR_MSG(extack, "rtm_src_len must be 0 for MPLS");
 		goto errout;
-	if (rtm->rtm_tos != 0)
+	}
+	if (rtm->rtm_tos != 0) {
+		NL_SET_ERR_MSG(extack, "rtm_tos must be 0 for MPLS");
 		goto errout;
-	if (rtm->rtm_table != RT_TABLE_MAIN)
+	}
+	if (rtm->rtm_table != RT_TABLE_MAIN) {
+		NL_SET_ERR_MSG(extack,
+			       "MPLS only supports the main route table");
 		goto errout;
+	}
 	/* Any value is acceptable for rtm_protocol */
 
 	/* As mpls uses destination specific addresses
 	 * (or source specific address in the case of multicast)
 	 * all addresses have universal scope.
 	 */
-	if (rtm->rtm_scope != RT_SCOPE_UNIVERSE)
+	if (rtm->rtm_scope != RT_SCOPE_UNIVERSE) {
+		NL_SET_ERR_MSG(extack,
+			       "Invalid route scope  - MPLS only supports UNIVERSE");
 		goto errout;
-	if (rtm->rtm_type != RTN_UNICAST)
+	}
+	if (rtm->rtm_type != RTN_UNICAST) {
+		NL_SET_ERR_MSG(extack,
+			       "Invalid route type - MPLS only supports UNICAST");
 		goto errout;
-	if (rtm->rtm_flags != 0)
+	}
+	if (rtm->rtm_flags != 0) {
+		NL_SET_ERR_MSG(extack, "rtm_flags must be 0 for MPLS");
 		goto errout;
+	}
 
 	cfg->rc_label		= LABEL_NOT_SPECIFIED;
 	cfg->rc_protocol	= rtm->rtm_protocol;
@@ -1696,26 +1768,26 @@ static int rtm_to_route_config(struct sk_buff *skb,  struct nlmsghdr *nlh,
 		case RTA_NEWDST:
 			if (nla_get_labels(nla, MAX_NEW_LABELS,
 					   &cfg->rc_output_labels,
-					   cfg->rc_output_label))
+					   cfg->rc_output_label, extack))
 				goto errout;
 			break;
 		case RTA_DST:
 		{
 			u8 label_count;
 			if (nla_get_labels(nla, 1, &label_count,
-					   &cfg->rc_label))
+					   &cfg->rc_label, extack))
 				goto errout;
 
-			/* Reserved labels may not be set */
-			if (cfg->rc_label < MPLS_LABEL_FIRST_UNRESERVED)
+			if (!mpls_label_ok(cfg->rc_nlinfo.nl_net,
+					   cfg->rc_label, extack))
 				goto errout;
-
 			break;
 		}
 		case RTA_VIA:
 		{
 			if (nla_get_via(nla, &cfg->rc_via_alen,
-					&cfg->rc_via_table, cfg->rc_via))
+					&cfg->rc_via_table, cfg->rc_via,
+					extack))
 				goto errout;
 			break;
 		}
@@ -1729,14 +1801,18 @@ static int rtm_to_route_config(struct sk_buff *skb,  struct nlmsghdr *nlh,
 		{
 			u8 ttl_propagate = nla_get_u8(nla);
 
-			if (ttl_propagate > 1)
+			if (ttl_propagate > 1) {
+				NL_SET_ERR_MSG_ATTR(extack, nla,
+						    "RTA_TTL_PROPAGATE can only be 0 or 1");
 				goto errout;
+			}
 			cfg->rc_ttl_propagate = ttl_propagate ?
 				MPLS_TTL_PROP_ENABLED :
 				MPLS_TTL_PROP_DISABLED;
 			break;
 		}
 		default:
+			NL_SET_ERR_MSG_ATTR(extack, nla, "Unknown attribute");
 			/* Unsupported attribute */
 			goto errout;
 		}
@@ -1757,11 +1833,11 @@ static int mpls_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
 	if (!cfg)
 		return -ENOMEM;
 
-	err = rtm_to_route_config(skb, nlh, cfg);
+	err = rtm_to_route_config(skb, nlh, cfg, extack);
 	if (err < 0)
 		goto out;
 
-	err = mpls_route_del(cfg);
+	err = mpls_route_del(cfg, extack);
 out:
 	kfree(cfg);
 
@@ -1779,11 +1855,11 @@ static int mpls_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
 	if (!cfg)
 		return -ENOMEM;
 
-	err = rtm_to_route_config(skb, nlh, cfg);
+	err = rtm_to_route_config(skb, nlh, cfg, extack);
 	if (err < 0)
 		goto out;
 
-	err = mpls_route_add(cfg);
+	err = mpls_route_add(cfg, extack);
 out:
 	kfree(cfg);
 
diff --git a/net/mpls/internal.h b/net/mpls/internal.h
index 4db6a5971322..cf65aec2e551 100644
--- a/net/mpls/internal.h
+++ b/net/mpls/internal.h
@@ -203,9 +203,7 @@ static inline struct mpls_dev *mpls_dev_get(const struct net_device *dev)
 int nla_put_labels(struct sk_buff *skb, int attrtype,  u8 labels,
 		   const u32 label[]);
 int nla_get_labels(const struct nlattr *nla, u8 max_labels, u8 *labels,
-		   u32 label[]);
-int nla_get_via(const struct nlattr *nla, u8 *via_alen, u8 *via_table,
-		u8 via[]);
+		   u32 label[], struct netlink_ext_ack *extack);
 bool mpls_output_possible(const struct net_device *dev);
 unsigned int mpls_dev_mtu(const struct net_device *dev);
 bool mpls_pkt_too_big(const struct sk_buff *skb, unsigned int mtu);
diff --git a/net/mpls/mpls_iptunnel.c b/net/mpls/mpls_iptunnel.c
index 369c7a23c86c..6e558a419f60 100644
--- a/net/mpls/mpls_iptunnel.c
+++ b/net/mpls/mpls_iptunnel.c
@@ -159,7 +159,8 @@ drop:
 
 static int mpls_build_state(struct nlattr *nla,
 			    unsigned int family, const void *cfg,
-			    struct lwtunnel_state **ts)
+			    struct lwtunnel_state **ts,
+			    struct netlink_ext_ack *extack)
 {
 	struct mpls_iptunnel_encap *tun_encap_info;
 	struct nlattr *tb[MPLS_IPTUNNEL_MAX + 1];
@@ -168,17 +169,18 @@ static int mpls_build_state(struct nlattr *nla,
 	int ret;
 
 	ret = nla_parse_nested(tb, MPLS_IPTUNNEL_MAX, nla,
-			       mpls_iptunnel_policy, NULL);
+			       mpls_iptunnel_policy, extack);
 	if (ret < 0)
 		return ret;
 
-	if (!tb[MPLS_IPTUNNEL_DST])
+	if (!tb[MPLS_IPTUNNEL_DST]) {
+		NL_SET_ERR_MSG(extack, "MPLS_IPTUNNEL_DST attribute is missing");
 		return -EINVAL;
-
+	}
 
 	/* determine number of labels */
-	if (nla_get_labels(tb[MPLS_IPTUNNEL_DST],
-			   MAX_NEW_LABELS, &n_labels, NULL))
+	if (nla_get_labels(tb[MPLS_IPTUNNEL_DST], MAX_NEW_LABELS,
+			   &n_labels, NULL, extack))
 		return -EINVAL;
 
 	newts = lwtunnel_state_alloc(sizeof(*tun_encap_info) +
@@ -188,7 +190,8 @@ static int mpls_build_state(struct nlattr *nla,
 
 	tun_encap_info = mpls_lwtunnel_encap(newts);
 	ret = nla_get_labels(tb[MPLS_IPTUNNEL_DST], n_labels,
-			     &tun_encap_info->labels, tun_encap_info->label);
+			     &tun_encap_info->labels, tun_encap_info->label,
+			     extack);
 	if (ret)
 		goto errout;
 
diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c
index a504e87c6ddf..49bd8bb16b18 100644
--- a/net/netfilter/nf_synproxy_core.c
+++ b/net/netfilter/nf_synproxy_core.c
@@ -152,7 +152,7 @@ void synproxy_init_timestamp_cookie(const struct xt_synproxy_info *info,
 				    struct synproxy_options *opts)
 {
 	opts->tsecr = opts->tsval;
-	opts->tsval = tcp_time_stamp & ~0x3f;
+	opts->tsval = tcp_time_stamp_raw() & ~0x3f;
 
 	if (opts->options & XT_SYNPROXY_OPT_WSCALE) {
 		opts->tsval |= opts->wscale;
diff --git a/net/nfc/af_nfc.c b/net/nfc/af_nfc.c
index 54e40fa47822..d3e594eb36d0 100644
--- a/net/nfc/af_nfc.c
+++ b/net/nfc/af_nfc.c
@@ -48,7 +48,7 @@ static int nfc_sock_create(struct net *net, struct socket *sock, int proto,
 	return rc;
 }
 
-static struct net_proto_family nfc_sock_family_ops = {
+static const struct net_proto_family nfc_sock_family_ops = {
 	.owner  = THIS_MODULE,
 	.family = PF_NFC,
 	.create = nfc_sock_create,
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 7b17da9a94a0..9ddc9f8412a2 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -453,7 +453,7 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
 
 	/* Complete checksum if needed */
 	if (skb->ip_summed == CHECKSUM_PARTIAL &&
-	    (err = skb_checksum_help(skb)))
+	    (err = skb_csum_hwoffload_help(skb, 0)))
 		goto out;
 
 	/* Older versions of OVS user space enforce alignment of the last
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index e3eeed19cc7a..82ca49fba336 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -196,8 +196,7 @@ static void *packet_previous_frame(struct packet_sock *po,
 		struct packet_ring_buffer *rb,
 		int status);
 static void packet_increment_head(struct packet_ring_buffer *buff);
-static int prb_curr_blk_in_use(struct tpacket_kbdq_core *,
-			struct tpacket_block_desc *);
+static int prb_curr_blk_in_use(struct tpacket_block_desc *);
 static void *prb_dispatch_next_block(struct tpacket_kbdq_core *,
 			struct packet_sock *);
 static void prb_retire_current_block(struct tpacket_kbdq_core *,
@@ -721,7 +720,7 @@ static void prb_retire_rx_blk_timer_expired(unsigned long data)
 			/* Case 1. Queue was frozen because user-space was
 			 *	   lagging behind.
 			 */
-			if (prb_curr_blk_in_use(pkc, pbd)) {
+			if (prb_curr_blk_in_use(pbd)) {
 				/*
 				 * Ok, user-space is still behind.
 				 * So just refresh the timer.
@@ -972,8 +971,7 @@ static void prb_retire_current_block(struct tpacket_kbdq_core *pkc,
 	}
 }
 
-static int prb_curr_blk_in_use(struct tpacket_kbdq_core *pkc,
-				      struct tpacket_block_desc *pbd)
+static int prb_curr_blk_in_use(struct tpacket_block_desc *pbd)
 {
 	return TP_STATUS_USER & BLOCK_STATUS(pbd);
 }
@@ -1064,7 +1062,7 @@ static void *__packet_lookup_frame_in_block(struct packet_sock *po,
 		 * Check if that last block which caused the queue to freeze,
 		 * is still in_use by user-space.
 		 */
-		if (prb_curr_blk_in_use(pkc, pbd)) {
+		if (prb_curr_blk_in_use(pbd)) {
 			/* Can't record this packet */
 			return NULL;
 		} else {
diff --git a/net/rxrpc/Makefile b/net/rxrpc/Makefile
index b9da4d6b914f..9c68d2f8ba39 100644
--- a/net/rxrpc/Makefile
+++ b/net/rxrpc/Makefile
@@ -19,6 +19,7 @@ rxrpc-y := \
 	local_event.o \
 	local_object.o \
 	misc.o \
+	net_ns.o \
 	output.o \
 	peer_event.o \
 	peer_object.o \
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index 7fb59c3f1542..0c4dc4a7832c 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -38,9 +38,6 @@ MODULE_PARM_DESC(debug, "RxRPC debugging mask");
 static struct proto rxrpc_proto;
 static const struct proto_ops rxrpc_rpc_ops;
 
-/* local epoch for detecting local-end reset */
-u32 rxrpc_epoch;
-
 /* current debugging ID */
 atomic_t rxrpc_debug_id;
 
@@ -134,9 +131,8 @@ static int rxrpc_validate_address(struct rxrpc_sock *rx,
 static int rxrpc_bind(struct socket *sock, struct sockaddr *saddr, int len)
 {
 	struct sockaddr_rxrpc *srx = (struct sockaddr_rxrpc *)saddr;
-	struct sock *sk = sock->sk;
 	struct rxrpc_local *local;
-	struct rxrpc_sock *rx = rxrpc_sk(sk);
+	struct rxrpc_sock *rx = rxrpc_sk(sock->sk);
 	u16 service_id = srx->srx_service;
 	int ret;
 
@@ -148,31 +144,48 @@ static int rxrpc_bind(struct socket *sock, struct sockaddr *saddr, int len)
 
 	lock_sock(&rx->sk);
 
-	if (rx->sk.sk_state != RXRPC_UNBOUND) {
-		ret = -EINVAL;
-		goto error_unlock;
-	}
-
-	memcpy(&rx->srx, srx, sizeof(rx->srx));
+	switch (rx->sk.sk_state) {
+	case RXRPC_UNBOUND:
+		rx->srx = *srx;
+		local = rxrpc_lookup_local(sock_net(&rx->sk), &rx->srx);
+		if (IS_ERR(local)) {
+			ret = PTR_ERR(local);
+			goto error_unlock;
+		}
 
-	local = rxrpc_lookup_local(&rx->srx);
-	if (IS_ERR(local)) {
-		ret = PTR_ERR(local);
-		goto error_unlock;
-	}
+		if (service_id) {
+			write_lock(&local->services_lock);
+			if (rcu_access_pointer(local->service))
+				goto service_in_use;
+			rx->local = local;
+			rcu_assign_pointer(local->service, rx);
+			write_unlock(&local->services_lock);
+
+			rx->sk.sk_state = RXRPC_SERVER_BOUND;
+		} else {
+			rx->local = local;
+			rx->sk.sk_state = RXRPC_CLIENT_BOUND;
+		}
+		break;
 
-	if (service_id) {
-		write_lock(&local->services_lock);
-		if (rcu_access_pointer(local->service))
-			goto service_in_use;
-		rx->local = local;
-		rcu_assign_pointer(local->service, rx);
-		write_unlock(&local->services_lock);
+	case RXRPC_SERVER_BOUND:
+		ret = -EINVAL;
+		if (service_id == 0)
+			goto error_unlock;
+		ret = -EADDRINUSE;
+		if (service_id == rx->srx.srx_service)
+			goto error_unlock;
+		ret = -EINVAL;
+		srx->srx_service = rx->srx.srx_service;
+		if (memcmp(srx, &rx->srx, sizeof(*srx)) != 0)
+			goto error_unlock;
+		rx->second_service = service_id;
+		rx->sk.sk_state = RXRPC_SERVER_BOUND2;
+		break;
 
-		rx->sk.sk_state = RXRPC_SERVER_BOUND;
-	} else {
-		rx->local = local;
-		rx->sk.sk_state = RXRPC_CLIENT_BOUND;
+	default:
+		ret = -EINVAL;
+		goto error_unlock;
 	}
 
 	release_sock(&rx->sk);
@@ -209,6 +222,7 @@ static int rxrpc_listen(struct socket *sock, int backlog)
 		ret = -EADDRNOTAVAIL;
 		break;
 	case RXRPC_SERVER_BOUND:
+	case RXRPC_SERVER_BOUND2:
 		ASSERT(rx->local != NULL);
 		max = READ_ONCE(rxrpc_max_backlog);
 		ret = -EINVAL;
@@ -434,7 +448,7 @@ static int rxrpc_sendmsg(struct socket *sock, struct msghdr *m, size_t len)
 			ret = -EAFNOSUPPORT;
 			goto error_unlock;
 		}
-		local = rxrpc_lookup_local(&rx->srx);
+		local = rxrpc_lookup_local(sock_net(sock->sk), &rx->srx);
 		if (IS_ERR(local)) {
 			ret = PTR_ERR(local);
 			goto error_unlock;
@@ -476,6 +490,7 @@ static int rxrpc_setsockopt(struct socket *sock, int level, int optname,
 {
 	struct rxrpc_sock *rx = rxrpc_sk(sock->sk);
 	unsigned int min_sec_level;
+	u16 service_upgrade[2];
 	int ret;
 
 	_enter(",%d,%d,,%d", level, optname, optlen);
@@ -532,6 +547,28 @@ static int rxrpc_setsockopt(struct socket *sock, int level, int optname,
 			rx->min_sec_level = min_sec_level;
 			goto success;
 
+		case RXRPC_UPGRADEABLE_SERVICE:
+			ret = -EINVAL;
+			if (optlen != sizeof(service_upgrade) ||
+			    rx->service_upgrade.from != 0)
+				goto error;
+			ret = -EISCONN;
+			if (rx->sk.sk_state != RXRPC_SERVER_BOUND2)
+				goto error;
+			ret = -EFAULT;
+			if (copy_from_user(service_upgrade, optval,
+					   sizeof(service_upgrade)) != 0)
+				goto error;
+			ret = -EINVAL;
+			if ((service_upgrade[0] != rx->srx.srx_service ||
+			     service_upgrade[1] != rx->second_service) &&
+			    (service_upgrade[0] != rx->second_service ||
+			     service_upgrade[1] != rx->srx.srx_service))
+				goto error;
+			rx->service_upgrade.from = service_upgrade[0];
+			rx->service_upgrade.to = service_upgrade[1];
+			goto success;
+
 		default:
 			break;
 		}
@@ -582,9 +619,6 @@ static int rxrpc_create(struct net *net, struct socket *sock, int protocol,
 
 	_enter("%p,%d", sock, protocol);
 
-	if (!net_eq(net, &init_net))
-		return -EAFNOSUPPORT;
-
 	/* we support transport protocol UDP/UDP6 only */
 	if (protocol != PF_INET &&
 	    IS_ENABLED(CONFIG_AF_RXRPC_IPV6) && protocol != PF_INET6)
@@ -780,8 +814,6 @@ static int __init af_rxrpc_init(void)
 
 	BUILD_BUG_ON(sizeof(struct rxrpc_skb_priv) > FIELD_SIZEOF(struct sk_buff, cb));
 
-	get_random_bytes(&rxrpc_epoch, sizeof(rxrpc_epoch));
-	rxrpc_epoch |= RXRPC_RANDOM_EPOCH;
 	get_random_bytes(&tmp, sizeof(tmp));
 	tmp &= 0x3fffffff;
 	if (tmp == 0)
@@ -809,6 +841,10 @@ static int __init af_rxrpc_init(void)
 		goto error_security;
 	}
 
+	ret = register_pernet_subsys(&rxrpc_net_ops);
+	if (ret)
+		goto error_pernet;
+
 	ret = proto_register(&rxrpc_proto, 1);
 	if (ret < 0) {
 		pr_crit("Cannot register protocol\n");
@@ -839,11 +875,6 @@ static int __init af_rxrpc_init(void)
 		goto error_sysctls;
 	}
 
-#ifdef CONFIG_PROC_FS
-	proc_create("rxrpc_calls", 0, init_net.proc_net, &rxrpc_call_seq_fops);
-	proc_create("rxrpc_conns", 0, init_net.proc_net,
-		    &rxrpc_connection_seq_fops);
-#endif
 	return 0;
 
 error_sysctls:
@@ -855,6 +886,8 @@ error_key_type:
 error_sock:
 	proto_unregister(&rxrpc_proto);
 error_proto:
+	unregister_pernet_subsys(&rxrpc_net_ops);
+error_pernet:
 	rxrpc_exit_security();
 error_security:
 	destroy_workqueue(rxrpc_workqueue);
@@ -875,14 +908,16 @@ static void __exit af_rxrpc_exit(void)
 	unregister_key_type(&key_type_rxrpc);
 	sock_unregister(PF_RXRPC);
 	proto_unregister(&rxrpc_proto);
-	rxrpc_destroy_all_calls();
-	rxrpc_destroy_all_connections();
+	unregister_pernet_subsys(&rxrpc_net_ops);
 	ASSERTCMP(atomic_read(&rxrpc_n_tx_skbs), ==, 0);
 	ASSERTCMP(atomic_read(&rxrpc_n_rx_skbs), ==, 0);
-	rxrpc_destroy_all_locals();
 
-	remove_proc_entry("rxrpc_conns", init_net.proc_net);
-	remove_proc_entry("rxrpc_calls", init_net.proc_net);
+	/* Make sure the local and peer records pinned by any dying connections
+	 * are released.
+	 */
+	rcu_barrier();
+	rxrpc_destroy_client_conn_ids();
+
 	destroy_workqueue(rxrpc_workqueue);
 	rxrpc_exit_security();
 	kmem_cache_destroy(rxrpc_call_jar);
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 7486926e60a8..e9b536cb0acf 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -11,6 +11,8 @@
 
 #include <linux/atomic.h>
 #include <linux/seqlock.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
 #include <net/sock.h>
 #include <net/af_rxrpc.h>
 #include <rxrpc/packet.h>
@@ -59,12 +61,44 @@ enum {
 	RXRPC_CLIENT_UNBOUND,		/* Unbound socket used as client */
 	RXRPC_CLIENT_BOUND,		/* client local address bound */
 	RXRPC_SERVER_BOUND,		/* server local address bound */
+	RXRPC_SERVER_BOUND2,		/* second server local address bound */
 	RXRPC_SERVER_LISTENING,		/* server listening for connections */
 	RXRPC_SERVER_LISTEN_DISABLED,	/* server listening disabled */
 	RXRPC_CLOSE,			/* socket is being closed */
 };
 
 /*
+ * Per-network namespace data.
+ */
+struct rxrpc_net {
+	struct proc_dir_entry	*proc_net;	/* Subdir in /proc/net */
+	u32			epoch;		/* Local epoch for detecting local-end reset */
+	struct list_head	calls;		/* List of calls active in this namespace */
+	rwlock_t		call_lock;	/* Lock for ->calls */
+
+	struct list_head	conn_proc_list;	/* List of conns in this namespace for proc */
+	struct list_head	service_conns;	/* Service conns in this namespace */
+	rwlock_t		conn_lock;	/* Lock for ->conn_proc_list, ->service_conns */
+	struct delayed_work	service_conn_reaper;
+
+	unsigned int		nr_client_conns;
+	unsigned int		nr_active_client_conns;
+	bool			kill_all_client_conns;
+	spinlock_t		client_conn_cache_lock; /* Lock for ->*_client_conns */
+	spinlock_t		client_conn_discard_lock; /* Prevent multiple discarders */
+	struct list_head	waiting_client_conns;
+	struct list_head	active_client_conns;
+	struct list_head	idle_client_conns;
+	struct delayed_work	client_conn_reaper;
+
+	struct list_head	local_endpoints;
+	struct mutex		local_mutex;	/* Lock for ->local_endpoints */
+
+	spinlock_t		peer_hash_lock;	/* Lock for ->peer_hash */
+	DECLARE_HASHTABLE	(peer_hash, 10);
+};
+
+/*
  * Service backlog preallocation.
  *
  * This contains circular buffers of preallocated peers, connections and calls
@@ -109,8 +143,14 @@ struct rxrpc_sock {
 	u32			min_sec_level;	/* minimum security level */
 #define RXRPC_SECURITY_MAX	RXRPC_SECURITY_ENCRYPT
 	bool			exclusive;	/* Exclusive connection for a client socket */
+	u16			second_service;	/* Additional service bound to the endpoint */
+	struct {
+		/* Service upgrade information */
+		u16		from;		/* Service ID to upgrade (if not 0) */
+		u16		to;		/* service ID to upgrade to */
+	} service_upgrade;
 	sa_family_t		family;		/* Protocol family created with */
-	struct sockaddr_rxrpc	srx;		/* local address */
+	struct sockaddr_rxrpc	srx;		/* Primary Service/local addresses */
 	struct sockaddr_rxrpc	connect_srx;	/* Default client address from connect() */
 };
 
@@ -211,6 +251,7 @@ struct rxrpc_security {
 struct rxrpc_local {
 	struct rcu_head		rcu;
 	atomic_t		usage;
+	struct rxrpc_net	*rxnet;		/* The network ns in which this resides */
 	struct list_head	link;
 	struct socket		*socket;	/* my UDP socket */
 	struct work_struct	processor;
@@ -279,6 +320,7 @@ struct rxrpc_conn_parameters {
 	struct rxrpc_peer	*peer;		/* Remote endpoint */
 	struct key		*key;		/* Security details */
 	bool			exclusive;	/* T if conn is exclusive */
+	bool			upgrade;	/* T if service ID can be upgraded */
 	u16			service_id;	/* Service ID for this connection */
 	u32			security_level;	/* Security level selected */
 };
@@ -293,6 +335,7 @@ enum rxrpc_conn_flag {
 	RXRPC_CONN_EXPOSED,		/* Conn has extra ref for exposure */
 	RXRPC_CONN_DONT_REUSE,		/* Don't reuse this connection */
 	RXRPC_CONN_COUNTED,		/* Counted by rxrpc_nr_client_conns */
+	RXRPC_CONN_PROBING_FOR_UPGRADE,	/* Probing for service upgrade */
 };
 
 /*
@@ -309,6 +352,7 @@ enum rxrpc_conn_cache_state {
 	RXRPC_CONN_CLIENT_INACTIVE,	/* Conn is not yet listed */
 	RXRPC_CONN_CLIENT_WAITING,	/* Conn is on wait list, waiting for capacity */
 	RXRPC_CONN_CLIENT_ACTIVE,	/* Conn is on active list, doing calls */
+	RXRPC_CONN_CLIENT_UPGRADE,	/* Conn is on active list, probing for upgrade */
 	RXRPC_CONN_CLIENT_CULLED,	/* Conn is culled and delisted, doing calls */
 	RXRPC_CONN_CLIENT_IDLE,		/* Conn is on idle list, doing mostly nothing */
 	RXRPC_CONN__NR_CACHE_STATES
@@ -352,7 +396,6 @@ struct rxrpc_connection {
 		u32			call_counter;	/* Call ID counter */
 		u32			last_call;	/* ID of last call */
 		u8			last_type;	/* Type of last packet */
-		u16			last_service_id;
 		union {
 			u32		last_seq;
 			u32		last_abort;
@@ -383,6 +426,7 @@ struct rxrpc_connection {
 	atomic_t		serial;		/* packet serial number counter */
 	unsigned int		hi_serial;	/* highest serial number received */
 	u32			security_nonce;	/* response re-use preventer */
+	u16			service_id;	/* Service ID, possibly upgraded */
 	u8			size_align;	/* data size alignment (for security) */
 	u8			security_size;	/* security header size */
 	u8			security_ix;	/* security type */
@@ -601,7 +645,6 @@ struct rxrpc_ack_summary {
  * af_rxrpc.c
  */
 extern atomic_t rxrpc_n_tx_skbs, rxrpc_n_rx_skbs;
-extern u32 rxrpc_epoch;
 extern atomic_t rxrpc_debug_id;
 extern struct workqueue_struct *rxrpc_workqueue;
 
@@ -634,8 +677,6 @@ extern const char *const rxrpc_call_states[];
 extern const char *const rxrpc_call_completions[];
 extern unsigned int rxrpc_max_call_lifetime;
 extern struct kmem_cache *rxrpc_call_jar;
-extern struct list_head rxrpc_calls;
-extern rwlock_t rxrpc_call_lock;
 
 struct rxrpc_call *rxrpc_find_call_by_user_ID(struct rxrpc_sock *, unsigned long);
 struct rxrpc_call *rxrpc_alloc_call(gfp_t);
@@ -653,7 +694,7 @@ void rxrpc_see_call(struct rxrpc_call *);
 void rxrpc_get_call(struct rxrpc_call *, enum rxrpc_call_trace);
 void rxrpc_put_call(struct rxrpc_call *, enum rxrpc_call_trace);
 void rxrpc_cleanup_call(struct rxrpc_call *);
-void __exit rxrpc_destroy_all_calls(void);
+void rxrpc_destroy_all_calls(struct rxrpc_net *);
 
 static inline bool rxrpc_is_service_call(const struct rxrpc_call *call)
 {
@@ -773,7 +814,8 @@ int rxrpc_connect_call(struct rxrpc_call *, struct rxrpc_conn_parameters *,
 void rxrpc_expose_client_call(struct rxrpc_call *);
 void rxrpc_disconnect_client_call(struct rxrpc_call *);
 void rxrpc_put_client_conn(struct rxrpc_connection *);
-void __exit rxrpc_destroy_all_client_connections(void);
+void rxrpc_discard_expired_client_conns(struct work_struct *);
+void rxrpc_destroy_all_client_connections(struct rxrpc_net *);
 
 /*
  * conn_event.c
@@ -784,9 +826,6 @@ void rxrpc_process_connection(struct work_struct *);
  * conn_object.c
  */
 extern unsigned int rxrpc_connection_expiry;
-extern struct list_head rxrpc_connections;
-extern struct list_head rxrpc_connection_proc_list;
-extern rwlock_t rxrpc_connection_lock;
 
 int rxrpc_extract_addr_from_skb(struct sockaddr_rxrpc *, struct sk_buff *);
 struct rxrpc_connection *rxrpc_alloc_connection(gfp_t);
@@ -800,7 +839,8 @@ void rxrpc_see_connection(struct rxrpc_connection *);
 void rxrpc_get_connection(struct rxrpc_connection *);
 struct rxrpc_connection *rxrpc_get_connection_maybe(struct rxrpc_connection *);
 void rxrpc_put_service_conn(struct rxrpc_connection *);
-void __exit rxrpc_destroy_all_connections(void);
+void rxrpc_service_connection_reaper(struct work_struct *);
+void rxrpc_destroy_all_connections(struct rxrpc_net *);
 
 static inline bool rxrpc_conn_is_client(const struct rxrpc_connection *conn)
 {
@@ -828,8 +868,9 @@ static inline void rxrpc_put_connection(struct rxrpc_connection *conn)
  */
 struct rxrpc_connection *rxrpc_find_service_conn_rcu(struct rxrpc_peer *,
 						     struct sk_buff *);
-struct rxrpc_connection *rxrpc_prealloc_service_connection(gfp_t);
-void rxrpc_new_incoming_connection(struct rxrpc_connection *, struct sk_buff *);
+struct rxrpc_connection *rxrpc_prealloc_service_connection(struct rxrpc_net *, gfp_t);
+void rxrpc_new_incoming_connection(struct rxrpc_sock *,
+				   struct rxrpc_connection *, struct sk_buff *);
 void rxrpc_unpublish_service_conn(struct rxrpc_connection *);
 
 /*
@@ -861,9 +902,9 @@ extern void rxrpc_process_local_events(struct rxrpc_local *);
 /*
  * local_object.c
  */
-struct rxrpc_local *rxrpc_lookup_local(const struct sockaddr_rxrpc *);
+struct rxrpc_local *rxrpc_lookup_local(struct net *, const struct sockaddr_rxrpc *);
 void __rxrpc_put_local(struct rxrpc_local *);
-void __exit rxrpc_destroy_all_locals(void);
+void rxrpc_destroy_all_locals(struct rxrpc_net *);
 
 static inline void rxrpc_get_local(struct rxrpc_local *local)
 {
@@ -902,6 +943,17 @@ extern unsigned int rxrpc_resend_timeout;
 extern const s8 rxrpc_ack_priority[];
 
 /*
+ * net_ns.c
+ */
+extern unsigned int rxrpc_net_id;
+extern struct pernet_operations rxrpc_net_ops;
+
+static inline struct rxrpc_net *rxrpc_net(struct net *net)
+{
+	return net_generic(net, rxrpc_net_id);
+}
+
+/*
  * output.c
  */
 int rxrpc_send_ack_packet(struct rxrpc_call *, bool);
diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c
index 1752fcf8e8f1..0d4d84e8c074 100644
--- a/net/rxrpc/call_accept.c
+++ b/net/rxrpc/call_accept.c
@@ -38,6 +38,7 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx,
 {
 	const void *here = __builtin_return_address(0);
 	struct rxrpc_call *call;
+	struct rxrpc_net *rxnet = rxrpc_net(sock_net(&rx->sk));
 	int max, tmp;
 	unsigned int size = RXRPC_BACKLOG_MAX;
 	unsigned int head, tail, call_head, call_tail;
@@ -79,7 +80,7 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx,
 	if (CIRC_CNT(head, tail, size) < max) {
 		struct rxrpc_connection *conn;
 
-		conn = rxrpc_prealloc_service_connection(gfp);
+		conn = rxrpc_prealloc_service_connection(rxnet, gfp);
 		if (!conn)
 			return -ENOMEM;
 		b->conn_backlog[head] = conn;
@@ -136,9 +137,9 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx,
 
 	write_unlock(&rx->call_lock);
 
-	write_lock(&rxrpc_call_lock);
-	list_add_tail(&call->link, &rxrpc_calls);
-	write_unlock(&rxrpc_call_lock);
+	write_lock(&rxnet->call_lock);
+	list_add_tail(&call->link, &rxnet->calls);
+	write_unlock(&rxnet->call_lock);
 
 	b->call_backlog[call_head] = call;
 	smp_store_release(&b->call_backlog_head, (call_head + 1) & (size - 1));
@@ -185,6 +186,7 @@ int rxrpc_service_prealloc(struct rxrpc_sock *rx, gfp_t gfp)
 void rxrpc_discard_prealloc(struct rxrpc_sock *rx)
 {
 	struct rxrpc_backlog *b = rx->backlog;
+	struct rxrpc_net *rxnet = rxrpc_net(sock_net(&rx->sk));
 	unsigned int size = RXRPC_BACKLOG_MAX, head, tail;
 
 	if (!b)
@@ -209,10 +211,10 @@ void rxrpc_discard_prealloc(struct rxrpc_sock *rx)
 	tail = b->conn_backlog_tail;
 	while (CIRC_CNT(head, tail, size) > 0) {
 		struct rxrpc_connection *conn = b->conn_backlog[tail];
-		write_lock(&rxrpc_connection_lock);
+		write_lock(&rxnet->conn_lock);
 		list_del(&conn->link);
 		list_del(&conn->proc_link);
-		write_unlock(&rxrpc_connection_lock);
+		write_unlock(&rxnet->conn_lock);
 		kfree(conn);
 		tail = (tail + 1) & (size - 1);
 	}
@@ -294,7 +296,7 @@ static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx,
 		conn->params.local = local;
 		conn->params.peer = peer;
 		rxrpc_see_connection(conn);
-		rxrpc_new_incoming_connection(conn, skb);
+		rxrpc_new_incoming_connection(rx, conn, skb);
 	} else {
 		rxrpc_get_connection(conn);
 	}
@@ -339,7 +341,8 @@ struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *local,
 
 	/* Get the socket providing the service */
 	rx = rcu_dereference(local->service);
-	if (rx && service_id == rx->srx.srx_service)
+	if (rx && (service_id == rx->srx.srx_service ||
+		   service_id == rx->second_service))
 		goto found_service;
 
 	trace_rxrpc_abort("INV", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index 47f7f4205653..692110808baa 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -44,8 +44,6 @@ const char *const rxrpc_call_completions[NR__RXRPC_CALL_COMPLETIONS] = {
 };
 
 struct kmem_cache *rxrpc_call_jar;
-LIST_HEAD(rxrpc_calls);
-DEFINE_RWLOCK(rxrpc_call_lock);
 
 static void rxrpc_call_timer_expired(unsigned long _call)
 {
@@ -207,6 +205,7 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
 	__releases(&rx->sk.sk_lock.slock)
 {
 	struct rxrpc_call *call, *xcall;
+	struct rxrpc_net *rxnet = rxrpc_net(sock_net(&rx->sk));
 	struct rb_node *parent, **pp;
 	const void *here = __builtin_return_address(0);
 	int ret;
@@ -255,9 +254,9 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
 
 	write_unlock(&rx->call_lock);
 
-	write_lock(&rxrpc_call_lock);
-	list_add_tail(&call->link, &rxrpc_calls);
-	write_unlock(&rxrpc_call_lock);
+	write_lock(&rxnet->call_lock);
+	list_add_tail(&call->link, &rxnet->calls);
+	write_unlock(&rxnet->call_lock);
 
 	/* From this point on, the call is protected by its own lock. */
 	release_sock(&rx->sk);
@@ -508,6 +507,7 @@ void rxrpc_release_calls_on_socket(struct rxrpc_sock *rx)
  */
 void rxrpc_put_call(struct rxrpc_call *call, enum rxrpc_call_trace op)
 {
+	struct rxrpc_net *rxnet;
 	const void *here = __builtin_return_address(0);
 	int n;
 
@@ -520,9 +520,12 @@ void rxrpc_put_call(struct rxrpc_call *call, enum rxrpc_call_trace op)
 		_debug("call %d dead", call->debug_id);
 		ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE);
 
-		write_lock(&rxrpc_call_lock);
-		list_del_init(&call->link);
-		write_unlock(&rxrpc_call_lock);
+		if (!list_empty(&call->link)) {
+			rxnet = rxrpc_net(sock_net(&call->socket->sk));
+			write_lock(&rxnet->call_lock);
+			list_del_init(&call->link);
+			write_unlock(&rxnet->call_lock);
+		}
 
 		rxrpc_cleanup_call(call);
 	}
@@ -570,21 +573,23 @@ void rxrpc_cleanup_call(struct rxrpc_call *call)
 }
 
 /*
- * Make sure that all calls are gone.
+ * Make sure that all calls are gone from a network namespace.  To reach this
+ * point, any open UDP sockets in that namespace must have been closed, so any
+ * outstanding calls cannot be doing I/O.
  */
-void __exit rxrpc_destroy_all_calls(void)
+void rxrpc_destroy_all_calls(struct rxrpc_net *rxnet)
 {
 	struct rxrpc_call *call;
 
 	_enter("");
 
-	if (list_empty(&rxrpc_calls))
+	if (list_empty(&rxnet->calls))
 		return;
 
-	write_lock(&rxrpc_call_lock);
+	write_lock(&rxnet->call_lock);
 
-	while (!list_empty(&rxrpc_calls)) {
-		call = list_entry(rxrpc_calls.next, struct rxrpc_call, link);
+	while (!list_empty(&rxnet->calls)) {
+		call = list_entry(rxnet->calls.next, struct rxrpc_call, link);
 		_debug("Zapping call %p", call);
 
 		rxrpc_see_call(call);
@@ -595,10 +600,10 @@ void __exit rxrpc_destroy_all_calls(void)
 		       rxrpc_call_states[call->state],
 		       call->flags, call->events);
 
-		write_unlock(&rxrpc_call_lock);
+		write_unlock(&rxnet->call_lock);
 		cond_resched();
-		write_lock(&rxrpc_call_lock);
+		write_lock(&rxnet->call_lock);
 	}
 
-	write_unlock(&rxrpc_call_lock);
+	write_unlock(&rxnet->call_lock);
 }
diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c
index e8dea0d49e7f..dd8bb919c15a 100644
--- a/net/rxrpc/conn_client.c
+++ b/net/rxrpc/conn_client.c
@@ -31,22 +31,25 @@
  *      may freely grant available channels to new calls and calls may be
  *      waiting on it for channels to become available.
  *
- *	The connection is on the rxrpc_active_client_conns list which is kept
+ *	The connection is on the rxnet->active_client_conns list which is kept
  *	in activation order for culling purposes.
  *
  *	rxrpc_nr_active_client_conns is held incremented also.
  *
- *  (4) CULLED - The connection got summarily culled to try and free up
+ *  (4) UPGRADE - As for ACTIVE, but only one call may be in progress and is
+ *      being used to probe for service upgrade.
+ *
+ *  (5) CULLED - The connection got summarily culled to try and free up
  *      capacity.  Calls currently in progress on the connection are allowed to
  *      continue, but new calls will have to wait.  There can be no waiters in
  *      this state - the conn would have to go to the WAITING state instead.
  *
- *  (5) IDLE - The connection has no calls in progress upon it and must have
+ *  (6) IDLE - The connection has no calls in progress upon it and must have
  *      been exposed to the world (ie. the EXPOSED flag must be set).  When it
  *      expires, the EXPOSED flag is cleared and the connection transitions to
  *      the INACTIVE state.
  *
- *	The connection is on the rxrpc_idle_client_conns list which is kept in
+ *	The connection is on the rxnet->idle_client_conns list which is kept in
  *	order of how soon they'll expire.
  *
  * There are flags of relevance to the cache:
@@ -85,27 +88,13 @@ __read_mostly unsigned int rxrpc_reap_client_connections = 900;
 __read_mostly unsigned int rxrpc_conn_idle_client_expiry = 2 * 60 * HZ;
 __read_mostly unsigned int rxrpc_conn_idle_client_fast_expiry = 2 * HZ;
 
-static unsigned int rxrpc_nr_client_conns;
-static unsigned int rxrpc_nr_active_client_conns;
-static __read_mostly bool rxrpc_kill_all_client_conns;
-
-static DEFINE_SPINLOCK(rxrpc_client_conn_cache_lock);
-static DEFINE_SPINLOCK(rxrpc_client_conn_discard_mutex);
-static LIST_HEAD(rxrpc_waiting_client_conns);
-static LIST_HEAD(rxrpc_active_client_conns);
-static LIST_HEAD(rxrpc_idle_client_conns);
-
 /*
  * We use machine-unique IDs for our client connections.
  */
 DEFINE_IDR(rxrpc_client_conn_ids);
 static DEFINE_SPINLOCK(rxrpc_conn_id_lock);
 
-static void rxrpc_cull_active_client_conns(void);
-static void rxrpc_discard_expired_client_conns(struct work_struct *);
-
-static DECLARE_DELAYED_WORK(rxrpc_client_conn_reap,
-			    rxrpc_discard_expired_client_conns);
+static void rxrpc_cull_active_client_conns(struct rxrpc_net *);
 
 /*
  * Get a connection ID and epoch for a client connection from the global pool.
@@ -116,6 +105,7 @@ static DECLARE_DELAYED_WORK(rxrpc_client_conn_reap,
 static int rxrpc_get_client_connection_id(struct rxrpc_connection *conn,
 					  gfp_t gfp)
 {
+	struct rxrpc_net *rxnet = conn->params.local->rxnet;
 	int id;
 
 	_enter("");
@@ -131,7 +121,7 @@ static int rxrpc_get_client_connection_id(struct rxrpc_connection *conn,
 	spin_unlock(&rxrpc_conn_id_lock);
 	idr_preload_end();
 
-	conn->proto.epoch = rxrpc_epoch;
+	conn->proto.epoch = rxnet->epoch;
 	conn->proto.cid = id << RXRPC_CIDSHIFT;
 	set_bit(RXRPC_CONN_HAS_IDR, &conn->flags);
 	_leave(" [CID %x]", conn->proto.cid);
@@ -183,6 +173,7 @@ static struct rxrpc_connection *
 rxrpc_alloc_client_connection(struct rxrpc_conn_parameters *cp, gfp_t gfp)
 {
 	struct rxrpc_connection *conn;
+	struct rxrpc_net *rxnet = cp->local->rxnet;
 	int ret;
 
 	_enter("");
@@ -196,10 +187,13 @@ rxrpc_alloc_client_connection(struct rxrpc_conn_parameters *cp, gfp_t gfp)
 	atomic_set(&conn->usage, 1);
 	if (cp->exclusive)
 		__set_bit(RXRPC_CONN_DONT_REUSE, &conn->flags);
+	if (cp->upgrade)
+		__set_bit(RXRPC_CONN_PROBING_FOR_UPGRADE, &conn->flags);
 
 	conn->params		= *cp;
 	conn->out_clientflag	= RXRPC_CLIENT_INITIATED;
 	conn->state		= RXRPC_CONN_CLIENT;
+	conn->service_id	= cp->service_id;
 
 	ret = rxrpc_get_client_connection_id(conn, gfp);
 	if (ret < 0)
@@ -213,9 +207,9 @@ rxrpc_alloc_client_connection(struct rxrpc_conn_parameters *cp, gfp_t gfp)
 	if (ret < 0)
 		goto error_2;
 
-	write_lock(&rxrpc_connection_lock);
-	list_add_tail(&conn->proc_link, &rxrpc_connection_proc_list);
-	write_unlock(&rxrpc_connection_lock);
+	write_lock(&rxnet->conn_lock);
+	list_add_tail(&conn->proc_link, &rxnet->conn_proc_list);
+	write_unlock(&rxnet->conn_lock);
 
 	/* We steal the caller's peer ref. */
 	cp->peer = NULL;
@@ -243,12 +237,13 @@ error_0:
  */
 static bool rxrpc_may_reuse_conn(struct rxrpc_connection *conn)
 {
+	struct rxrpc_net *rxnet = conn->params.local->rxnet;
 	int id_cursor, id, distance, limit;
 
 	if (test_bit(RXRPC_CONN_DONT_REUSE, &conn->flags))
 		goto dont_reuse;
 
-	if (conn->proto.epoch != rxrpc_epoch)
+	if (conn->proto.epoch != rxnet->epoch)
 		goto mark_dont_reuse;
 
 	/* The IDR tree gets very expensive on memory if the connection IDs are
@@ -310,7 +305,8 @@ static int rxrpc_get_client_conn(struct rxrpc_call *call,
 #define cmp(X) ((long)conn->params.X - (long)cp->X)
 			diff = (cmp(peer) ?:
 				cmp(key) ?:
-				cmp(security_level));
+				cmp(security_level) ?:
+				cmp(upgrade));
 #undef cmp
 			if (diff < 0) {
 				p = p->rb_left;
@@ -354,6 +350,7 @@ static int rxrpc_get_client_conn(struct rxrpc_call *call,
 	if (cp->exclusive) {
 		call->conn = candidate;
 		call->security_ix = candidate->security_ix;
+		call->service_id = candidate->service_id;
 		_leave(" = 0 [exclusive %d]", candidate->debug_id);
 		return 0;
 	}
@@ -374,7 +371,8 @@ static int rxrpc_get_client_conn(struct rxrpc_call *call,
 #define cmp(X) ((long)conn->params.X - (long)candidate->params.X)
 		diff = (cmp(peer) ?:
 			cmp(key) ?:
-			cmp(security_level));
+			cmp(security_level) ?:
+			cmp(upgrade));
 #undef cmp
 		if (diff < 0) {
 			pp = &(*pp)->rb_left;
@@ -403,6 +401,7 @@ candidate_published:
 	set_bit(RXRPC_CONN_IN_CLIENT_CONNS, &candidate->flags);
 	call->conn = candidate;
 	call->security_ix = candidate->security_ix;
+	call->service_id = candidate->service_id;
 	spin_unlock(&local->client_conns_lock);
 	_leave(" = 0 [new %d]", candidate->debug_id);
 	return 0;
@@ -424,6 +423,7 @@ found_extant_conn:
 	spin_lock(&conn->channel_lock);
 	call->conn = conn;
 	call->security_ix = conn->security_ix;
+	call->service_id = conn->service_id;
 	list_add(&call->chan_wait_link, &conn->waiting_calls);
 	spin_unlock(&conn->channel_lock);
 	_leave(" = 0 [extant %d]", conn->debug_id);
@@ -440,12 +440,18 @@ error:
 /*
  * Activate a connection.
  */
-static void rxrpc_activate_conn(struct rxrpc_connection *conn)
+static void rxrpc_activate_conn(struct rxrpc_net *rxnet,
+				struct rxrpc_connection *conn)
 {
-	trace_rxrpc_client(conn, -1, rxrpc_client_to_active);
-	conn->cache_state = RXRPC_CONN_CLIENT_ACTIVE;
-	rxrpc_nr_active_client_conns++;
-	list_move_tail(&conn->cache_link, &rxrpc_active_client_conns);
+	if (test_bit(RXRPC_CONN_PROBING_FOR_UPGRADE, &conn->flags)) {
+		trace_rxrpc_client(conn, -1, rxrpc_client_to_upgrade);
+		conn->cache_state = RXRPC_CONN_CLIENT_UPGRADE;
+	} else {
+		trace_rxrpc_client(conn, -1, rxrpc_client_to_active);
+		conn->cache_state = RXRPC_CONN_CLIENT_ACTIVE;
+	}
+	rxnet->nr_active_client_conns++;
+	list_move_tail(&conn->cache_link, &rxnet->active_client_conns);
 }
 
 /*
@@ -460,25 +466,28 @@ static void rxrpc_activate_conn(struct rxrpc_connection *conn)
  * channels if it has been culled to make space and then re-requested by a new
  * call.
  */
-static void rxrpc_animate_client_conn(struct rxrpc_connection *conn)
+static void rxrpc_animate_client_conn(struct rxrpc_net *rxnet,
+				      struct rxrpc_connection *conn)
 {
 	unsigned int nr_conns;
 
 	_enter("%d,%d", conn->debug_id, conn->cache_state);
 
-	if (conn->cache_state == RXRPC_CONN_CLIENT_ACTIVE)
+	if (conn->cache_state == RXRPC_CONN_CLIENT_ACTIVE ||
+	    conn->cache_state == RXRPC_CONN_CLIENT_UPGRADE)
 		goto out;
 
-	spin_lock(&rxrpc_client_conn_cache_lock);
+	spin_lock(&rxnet->client_conn_cache_lock);
 
-	nr_conns = rxrpc_nr_client_conns;
+	nr_conns = rxnet->nr_client_conns;
 	if (!test_and_set_bit(RXRPC_CONN_COUNTED, &conn->flags)) {
 		trace_rxrpc_client(conn, -1, rxrpc_client_count);
-		rxrpc_nr_client_conns = nr_conns + 1;
+		rxnet->nr_client_conns = nr_conns + 1;
 	}
 
 	switch (conn->cache_state) {
 	case RXRPC_CONN_CLIENT_ACTIVE:
+	case RXRPC_CONN_CLIENT_UPGRADE:
 	case RXRPC_CONN_CLIENT_WAITING:
 		break;
 
@@ -494,21 +503,21 @@ static void rxrpc_animate_client_conn(struct rxrpc_connection *conn)
 	}
 
 out_unlock:
-	spin_unlock(&rxrpc_client_conn_cache_lock);
+	spin_unlock(&rxnet->client_conn_cache_lock);
 out:
 	_leave(" [%d]", conn->cache_state);
 	return;
 
 activate_conn:
 	_debug("activate");
-	rxrpc_activate_conn(conn);
+	rxrpc_activate_conn(rxnet, conn);
 	goto out_unlock;
 
 wait_for_capacity:
 	_debug("wait");
 	trace_rxrpc_client(conn, -1, rxrpc_client_to_waiting);
 	conn->cache_state = RXRPC_CONN_CLIENT_WAITING;
-	list_move_tail(&conn->cache_link, &rxrpc_waiting_client_conns);
+	list_move_tail(&conn->cache_link, &rxnet->waiting_client_conns);
 	goto out_unlock;
 }
 
@@ -582,6 +591,9 @@ static void rxrpc_activate_channels_locked(struct rxrpc_connection *conn)
 	case RXRPC_CONN_CLIENT_ACTIVE:
 		mask = RXRPC_ACTIVE_CHANS_MASK;
 		break;
+	case RXRPC_CONN_CLIENT_UPGRADE:
+		mask = 0x01;
+		break;
 	default:
 		return;
 	}
@@ -660,18 +672,19 @@ int rxrpc_connect_call(struct rxrpc_call *call,
 		       struct sockaddr_rxrpc *srx,
 		       gfp_t gfp)
 {
+	struct rxrpc_net *rxnet = cp->local->rxnet;
 	int ret;
 
 	_enter("{%d,%lx},", call->debug_id, call->user_call_ID);
 
-	rxrpc_discard_expired_client_conns(NULL);
-	rxrpc_cull_active_client_conns();
+	rxrpc_discard_expired_client_conns(&rxnet->client_conn_reaper.work);
+	rxrpc_cull_active_client_conns(rxnet);
 
 	ret = rxrpc_get_client_conn(call, cp, srx, gfp);
 	if (ret < 0)
 		return ret;
 
-	rxrpc_animate_client_conn(call->conn);
+	rxrpc_animate_client_conn(rxnet, call->conn);
 	rxrpc_activate_channels(call->conn);
 
 	ret = rxrpc_wait_for_channel(call, gfp);
@@ -729,6 +742,7 @@ void rxrpc_disconnect_client_call(struct rxrpc_call *call)
 	unsigned int channel = call->cid & RXRPC_CHANNELMASK;
 	struct rxrpc_connection *conn = call->conn;
 	struct rxrpc_channel *chan = &conn->channels[channel];
+	struct rxrpc_net *rxnet = rxrpc_net(sock_net(&call->socket->sk));
 
 	trace_rxrpc_client(conn, channel, rxrpc_client_chan_disconnect);
 	call->conn = NULL;
@@ -750,7 +764,7 @@ void rxrpc_disconnect_client_call(struct rxrpc_call *call)
 		/* We must deactivate or idle the connection if it's now
 		 * waiting for nothing.
 		 */
-		spin_lock(&rxrpc_client_conn_cache_lock);
+		spin_lock(&rxnet->client_conn_cache_lock);
 		if (conn->cache_state == RXRPC_CONN_CLIENT_WAITING &&
 		    list_empty(&conn->waiting_calls) &&
 		    !conn->active_chans)
@@ -787,14 +801,23 @@ void rxrpc_disconnect_client_call(struct rxrpc_call *call)
 	 * list.  It might even get moved back to the active list whilst we're
 	 * waiting for the lock.
 	 */
-	spin_lock(&rxrpc_client_conn_cache_lock);
+	spin_lock(&rxnet->client_conn_cache_lock);
 
 	switch (conn->cache_state) {
+	case RXRPC_CONN_CLIENT_UPGRADE:
+		/* Deal with termination of a service upgrade probe. */
+		if (test_bit(RXRPC_CONN_EXPOSED, &conn->flags)) {
+			clear_bit(RXRPC_CONN_PROBING_FOR_UPGRADE, &conn->flags);
+			trace_rxrpc_client(conn, channel, rxrpc_client_to_active);
+			conn->cache_state = RXRPC_CONN_CLIENT_ACTIVE;
+			rxrpc_activate_channels_locked(conn);
+		}
+		/* fall through */
 	case RXRPC_CONN_CLIENT_ACTIVE:
 		if (list_empty(&conn->waiting_calls)) {
 			rxrpc_deactivate_one_channel(conn, channel);
 			if (!conn->active_chans) {
-				rxrpc_nr_active_client_conns--;
+				rxnet->nr_active_client_conns--;
 				goto idle_connection;
 			}
 			goto out;
@@ -820,7 +843,7 @@ void rxrpc_disconnect_client_call(struct rxrpc_call *call)
 	}
 
 out:
-	spin_unlock(&rxrpc_client_conn_cache_lock);
+	spin_unlock(&rxnet->client_conn_cache_lock);
 out_2:
 	spin_unlock(&conn->channel_lock);
 	rxrpc_put_connection(conn);
@@ -835,11 +858,11 @@ idle_connection:
 		trace_rxrpc_client(conn, channel, rxrpc_client_to_idle);
 		conn->idle_timestamp = jiffies;
 		conn->cache_state = RXRPC_CONN_CLIENT_IDLE;
-		list_move_tail(&conn->cache_link, &rxrpc_idle_client_conns);
-		if (rxrpc_idle_client_conns.next == &conn->cache_link &&
-		    !rxrpc_kill_all_client_conns)
+		list_move_tail(&conn->cache_link, &rxnet->idle_client_conns);
+		if (rxnet->idle_client_conns.next == &conn->cache_link &&
+		    !rxnet->kill_all_client_conns)
 			queue_delayed_work(rxrpc_workqueue,
-					   &rxrpc_client_conn_reap,
+					   &rxnet->client_conn_reaper,
 					   rxrpc_conn_idle_client_expiry);
 	} else {
 		trace_rxrpc_client(conn, channel, rxrpc_client_to_inactive);
@@ -857,6 +880,7 @@ rxrpc_put_one_client_conn(struct rxrpc_connection *conn)
 {
 	struct rxrpc_connection *next = NULL;
 	struct rxrpc_local *local = conn->params.local;
+	struct rxrpc_net *rxnet = local->rxnet;
 	unsigned int nr_conns;
 
 	trace_rxrpc_client(conn, -1, rxrpc_client_cleanup);
@@ -875,18 +899,18 @@ rxrpc_put_one_client_conn(struct rxrpc_connection *conn)
 
 	if (test_bit(RXRPC_CONN_COUNTED, &conn->flags)) {
 		trace_rxrpc_client(conn, -1, rxrpc_client_uncount);
-		spin_lock(&rxrpc_client_conn_cache_lock);
-		nr_conns = --rxrpc_nr_client_conns;
+		spin_lock(&rxnet->client_conn_cache_lock);
+		nr_conns = --rxnet->nr_client_conns;
 
 		if (nr_conns < rxrpc_max_client_connections &&
-		    !list_empty(&rxrpc_waiting_client_conns)) {
-			next = list_entry(rxrpc_waiting_client_conns.next,
+		    !list_empty(&rxnet->waiting_client_conns)) {
+			next = list_entry(rxnet->waiting_client_conns.next,
 					  struct rxrpc_connection, cache_link);
 			rxrpc_get_connection(next);
-			rxrpc_activate_conn(next);
+			rxrpc_activate_conn(rxnet, next);
 		}
 
-		spin_unlock(&rxrpc_client_conn_cache_lock);
+		spin_unlock(&rxnet->client_conn_cache_lock);
 	}
 
 	rxrpc_kill_connection(conn);
@@ -921,10 +945,10 @@ void rxrpc_put_client_conn(struct rxrpc_connection *conn)
 /*
  * Kill the longest-active client connections to make room for new ones.
  */
-static void rxrpc_cull_active_client_conns(void)
+static void rxrpc_cull_active_client_conns(struct rxrpc_net *rxnet)
 {
 	struct rxrpc_connection *conn;
-	unsigned int nr_conns = rxrpc_nr_client_conns;
+	unsigned int nr_conns = rxnet->nr_client_conns;
 	unsigned int nr_active, limit;
 
 	_enter("");
@@ -936,14 +960,15 @@ static void rxrpc_cull_active_client_conns(void)
 	}
 	limit = rxrpc_reap_client_connections;
 
-	spin_lock(&rxrpc_client_conn_cache_lock);
-	nr_active = rxrpc_nr_active_client_conns;
+	spin_lock(&rxnet->client_conn_cache_lock);
+	nr_active = rxnet->nr_active_client_conns;
 
 	while (nr_active > limit) {
-		ASSERT(!list_empty(&rxrpc_active_client_conns));
-		conn = list_entry(rxrpc_active_client_conns.next,
+		ASSERT(!list_empty(&rxnet->active_client_conns));
+		conn = list_entry(rxnet->active_client_conns.next,
 				  struct rxrpc_connection, cache_link);
-		ASSERTCMP(conn->cache_state, ==, RXRPC_CONN_CLIENT_ACTIVE);
+		ASSERTIFCMP(conn->cache_state != RXRPC_CONN_CLIENT_ACTIVE,
+			    conn->cache_state, ==, RXRPC_CONN_CLIENT_UPGRADE);
 
 		if (list_empty(&conn->waiting_calls)) {
 			trace_rxrpc_client(conn, -1, rxrpc_client_to_culled);
@@ -953,14 +978,14 @@ static void rxrpc_cull_active_client_conns(void)
 			trace_rxrpc_client(conn, -1, rxrpc_client_to_waiting);
 			conn->cache_state = RXRPC_CONN_CLIENT_WAITING;
 			list_move_tail(&conn->cache_link,
-				       &rxrpc_waiting_client_conns);
+				       &rxnet->waiting_client_conns);
 		}
 
 		nr_active--;
 	}
 
-	rxrpc_nr_active_client_conns = nr_active;
-	spin_unlock(&rxrpc_client_conn_cache_lock);
+	rxnet->nr_active_client_conns = nr_active;
+	spin_unlock(&rxnet->client_conn_cache_lock);
 	ASSERTCMP(nr_active, >=, 0);
 	_leave(" [culled]");
 }
@@ -972,22 +997,25 @@ static void rxrpc_cull_active_client_conns(void)
  * This may be called from conn setup or from a work item so cannot be
  * considered non-reentrant.
  */
-static void rxrpc_discard_expired_client_conns(struct work_struct *work)
+void rxrpc_discard_expired_client_conns(struct work_struct *work)
 {
 	struct rxrpc_connection *conn;
+	struct rxrpc_net *rxnet =
+		container_of(to_delayed_work(work),
+			     struct rxrpc_net, client_conn_reaper);
 	unsigned long expiry, conn_expires_at, now;
 	unsigned int nr_conns;
 	bool did_discard = false;
 
-	_enter("%c", work ? 'w' : 'n');
+	_enter("");
 
-	if (list_empty(&rxrpc_idle_client_conns)) {
+	if (list_empty(&rxnet->idle_client_conns)) {
 		_leave(" [empty]");
 		return;
 	}
 
 	/* Don't double up on the discarding */
-	if (!spin_trylock(&rxrpc_client_conn_discard_mutex)) {
+	if (!spin_trylock(&rxnet->client_conn_discard_lock)) {
 		_leave(" [already]");
 		return;
 	}
@@ -995,19 +1023,19 @@ static void rxrpc_discard_expired_client_conns(struct work_struct *work)
 	/* We keep an estimate of what the number of conns ought to be after
 	 * we've discarded some so that we don't overdo the discarding.
 	 */
-	nr_conns = rxrpc_nr_client_conns;
+	nr_conns = rxnet->nr_client_conns;
 
 next:
-	spin_lock(&rxrpc_client_conn_cache_lock);
+	spin_lock(&rxnet->client_conn_cache_lock);
 
-	if (list_empty(&rxrpc_idle_client_conns))
+	if (list_empty(&rxnet->idle_client_conns))
 		goto out;
 
-	conn = list_entry(rxrpc_idle_client_conns.next,
+	conn = list_entry(rxnet->idle_client_conns.next,
 			  struct rxrpc_connection, cache_link);
 	ASSERT(test_bit(RXRPC_CONN_EXPOSED, &conn->flags));
 
-	if (!rxrpc_kill_all_client_conns) {
+	if (!rxnet->kill_all_client_conns) {
 		/* If the number of connections is over the reap limit, we
 		 * expedite discard by reducing the expiry timeout.  We must,
 		 * however, have at least a short grace period to be able to do
@@ -1030,7 +1058,7 @@ next:
 	conn->cache_state = RXRPC_CONN_CLIENT_INACTIVE;
 	list_del_init(&conn->cache_link);
 
-	spin_unlock(&rxrpc_client_conn_cache_lock);
+	spin_unlock(&rxnet->client_conn_cache_lock);
 
 	/* When we cleared the EXPOSED flag, we took on responsibility for the
 	 * reference that that had on the usage count.  We deal with that here.
@@ -1050,14 +1078,14 @@ not_yet_expired:
 	 * then things get messier.
 	 */
 	_debug("not yet");
-	if (!rxrpc_kill_all_client_conns)
+	if (!rxnet->kill_all_client_conns)
 		queue_delayed_work(rxrpc_workqueue,
-				   &rxrpc_client_conn_reap,
+				   &rxnet->client_conn_reaper,
 				   conn_expires_at - now);
 
 out:
-	spin_unlock(&rxrpc_client_conn_cache_lock);
-	spin_unlock(&rxrpc_client_conn_discard_mutex);
+	spin_unlock(&rxnet->client_conn_cache_lock);
+	spin_unlock(&rxnet->client_conn_discard_lock);
 	_leave("");
 }
 
@@ -1065,17 +1093,17 @@ out:
  * Preemptively destroy all the client connection records rather than waiting
  * for them to time out
  */
-void __exit rxrpc_destroy_all_client_connections(void)
+void rxrpc_destroy_all_client_connections(struct rxrpc_net *rxnet)
 {
 	_enter("");
 
-	spin_lock(&rxrpc_client_conn_cache_lock);
-	rxrpc_kill_all_client_conns = true;
-	spin_unlock(&rxrpc_client_conn_cache_lock);
+	spin_lock(&rxnet->client_conn_cache_lock);
+	rxnet->kill_all_client_conns = true;
+	spin_unlock(&rxnet->client_conn_cache_lock);
 
-	cancel_delayed_work(&rxrpc_client_conn_reap);
+	cancel_delayed_work(&rxnet->client_conn_reaper);
 
-	if (!queue_delayed_work(rxrpc_workqueue, &rxrpc_client_conn_reap, 0))
+	if (!queue_delayed_work(rxrpc_workqueue, &rxnet->client_conn_reaper, 0))
 		_debug("destroy: queue failed");
 
 	_leave("");
diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c
index 46babcf82ce8..59a51a56e7c8 100644
--- a/net/rxrpc/conn_event.c
+++ b/net/rxrpc/conn_event.c
@@ -74,7 +74,7 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
 	pkt.whdr.userStatus	= 0;
 	pkt.whdr.securityIndex	= conn->security_ix;
 	pkt.whdr._rsvd		= 0;
-	pkt.whdr.serviceId	= htons(chan->last_service_id);
+	pkt.whdr.serviceId	= htons(conn->service_id);
 
 	len = sizeof(pkt.whdr);
 	switch (chan->last_type) {
@@ -208,7 +208,7 @@ static int rxrpc_abort_connection(struct rxrpc_connection *conn,
 	whdr.userStatus	= 0;
 	whdr.securityIndex = conn->security_ix;
 	whdr._rsvd	= 0;
-	whdr.serviceId	= htons(conn->params.service_id);
+	whdr.serviceId	= htons(conn->service_id);
 
 	word		= htonl(conn->local_abort);
 
diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c
index b0ecb770fdce..5bb255107427 100644
--- a/net/rxrpc/conn_object.c
+++ b/net/rxrpc/conn_object.c
@@ -22,13 +22,6 @@
  */
 unsigned int rxrpc_connection_expiry = 10 * 60;
 
-static void rxrpc_connection_reaper(struct work_struct *work);
-
-LIST_HEAD(rxrpc_connections);
-LIST_HEAD(rxrpc_connection_proc_list);
-DEFINE_RWLOCK(rxrpc_connection_lock);
-static DECLARE_DELAYED_WORK(rxrpc_connection_reap, rxrpc_connection_reaper);
-
 static void rxrpc_destroy_connection(struct rcu_head *);
 
 /*
@@ -174,7 +167,6 @@ void __rxrpc_disconnect_call(struct rxrpc_connection *conn,
 		 * through the channel, whilst disposing of the actual call record.
 		 */
 		trace_rxrpc_disconnect_call(call);
-		chan->last_service_id = call->service_id;
 		if (call->abort_code) {
 			chan->last_abort = call->abort_code;
 			chan->last_type = RXRPC_PACKET_TYPE_ABORT;
@@ -222,15 +214,17 @@ void rxrpc_disconnect_call(struct rxrpc_call *call)
  */
 void rxrpc_kill_connection(struct rxrpc_connection *conn)
 {
+	struct rxrpc_net *rxnet = conn->params.local->rxnet;
+
 	ASSERT(!rcu_access_pointer(conn->channels[0].call) &&
 	       !rcu_access_pointer(conn->channels[1].call) &&
 	       !rcu_access_pointer(conn->channels[2].call) &&
 	       !rcu_access_pointer(conn->channels[3].call));
 	ASSERT(list_empty(&conn->cache_link));
 
-	write_lock(&rxrpc_connection_lock);
+	write_lock(&rxnet->conn_lock);
 	list_del_init(&conn->proc_link);
-	write_unlock(&rxrpc_connection_lock);
+	write_unlock(&rxnet->conn_lock);
 
 	/* Drain the Rx queue.  Note that even though we've unpublished, an
 	 * incoming packet could still be being added to our Rx queue, so we
@@ -309,14 +303,17 @@ rxrpc_get_connection_maybe(struct rxrpc_connection *conn)
  */
 void rxrpc_put_service_conn(struct rxrpc_connection *conn)
 {
+	struct rxrpc_net *rxnet;
 	const void *here = __builtin_return_address(0);
 	int n;
 
 	n = atomic_dec_return(&conn->usage);
 	trace_rxrpc_conn(conn, rxrpc_conn_put_service, n, here);
 	ASSERTCMP(n, >=, 0);
-	if (n == 0)
-		rxrpc_queue_delayed_work(&rxrpc_connection_reap, 0);
+	if (n == 0) {
+		rxnet = conn->params.local->rxnet;
+		rxrpc_queue_delayed_work(&rxnet->service_conn_reaper, 0);
+	}
 }
 
 /*
@@ -348,9 +345,12 @@ static void rxrpc_destroy_connection(struct rcu_head *rcu)
 /*
  * reap dead service connections
  */
-static void rxrpc_connection_reaper(struct work_struct *work)
+void rxrpc_service_connection_reaper(struct work_struct *work)
 {
 	struct rxrpc_connection *conn, *_p;
+	struct rxrpc_net *rxnet =
+		container_of(to_delayed_work(work),
+			     struct rxrpc_net, service_conn_reaper);
 	unsigned long reap_older_than, earliest, idle_timestamp, now;
 
 	LIST_HEAD(graveyard);
@@ -361,8 +361,8 @@ static void rxrpc_connection_reaper(struct work_struct *work)
 	reap_older_than = now - rxrpc_connection_expiry * HZ;
 	earliest = ULONG_MAX;
 
-	write_lock(&rxrpc_connection_lock);
-	list_for_each_entry_safe(conn, _p, &rxrpc_connections, link) {
+	write_lock(&rxnet->conn_lock);
+	list_for_each_entry_safe(conn, _p, &rxnet->service_conns, link) {
 		ASSERTCMP(atomic_read(&conn->usage), >, 0);
 		if (likely(atomic_read(&conn->usage) > 1))
 			continue;
@@ -393,12 +393,12 @@ static void rxrpc_connection_reaper(struct work_struct *work)
 
 		list_move_tail(&conn->link, &graveyard);
 	}
-	write_unlock(&rxrpc_connection_lock);
+	write_unlock(&rxnet->conn_lock);
 
 	if (earliest != ULONG_MAX) {
 		_debug("reschedule reaper %ld", (long) earliest - now);
 		ASSERT(time_after(earliest, now));
-		rxrpc_queue_delayed_work(&rxrpc_connection_reap,
+		rxrpc_queue_delayed_work(&rxnet->client_conn_reaper,
 					 earliest - now);
 	}
 
@@ -418,36 +418,30 @@ static void rxrpc_connection_reaper(struct work_struct *work)
  * preemptively destroy all the service connection records rather than
  * waiting for them to time out
  */
-void __exit rxrpc_destroy_all_connections(void)
+void rxrpc_destroy_all_connections(struct rxrpc_net *rxnet)
 {
 	struct rxrpc_connection *conn, *_p;
 	bool leak = false;
 
 	_enter("");
 
-	rxrpc_destroy_all_client_connections();
+	rxrpc_destroy_all_client_connections(rxnet);
 
 	rxrpc_connection_expiry = 0;
-	cancel_delayed_work(&rxrpc_connection_reap);
-	rxrpc_queue_delayed_work(&rxrpc_connection_reap, 0);
+	cancel_delayed_work(&rxnet->client_conn_reaper);
+	rxrpc_queue_delayed_work(&rxnet->client_conn_reaper, 0);
 	flush_workqueue(rxrpc_workqueue);
 
-	write_lock(&rxrpc_connection_lock);
-	list_for_each_entry_safe(conn, _p, &rxrpc_connections, link) {
+	write_lock(&rxnet->conn_lock);
+	list_for_each_entry_safe(conn, _p, &rxnet->service_conns, link) {
 		pr_err("AF_RXRPC: Leaked conn %p {%d}\n",
 		       conn, atomic_read(&conn->usage));
 		leak = true;
 	}
-	write_unlock(&rxrpc_connection_lock);
+	write_unlock(&rxnet->conn_lock);
 	BUG_ON(leak);
 
-	ASSERT(list_empty(&rxrpc_connection_proc_list));
-
-	/* Make sure the local and peer records pinned by any dying connections
-	 * are released.
-	 */
-	rcu_barrier();
-	rxrpc_destroy_client_conn_ids();
+	ASSERT(list_empty(&rxnet->conn_proc_list));
 
 	_leave("");
 }
diff --git a/net/rxrpc/conn_service.c b/net/rxrpc/conn_service.c
index eef551f40dc2..e60fcd2a4a02 100644
--- a/net/rxrpc/conn_service.c
+++ b/net/rxrpc/conn_service.c
@@ -121,7 +121,8 @@ replace_old_connection:
  * Preallocate a service connection.  The connection is placed on the proc and
  * reap lists so that we don't have to get the lock from BH context.
  */
-struct rxrpc_connection *rxrpc_prealloc_service_connection(gfp_t gfp)
+struct rxrpc_connection *rxrpc_prealloc_service_connection(struct rxrpc_net *rxnet,
+							   gfp_t gfp)
 {
 	struct rxrpc_connection *conn = rxrpc_alloc_connection(gfp);
 
@@ -132,10 +133,10 @@ struct rxrpc_connection *rxrpc_prealloc_service_connection(gfp_t gfp)
 		conn->state = RXRPC_CONN_SERVICE_PREALLOC;
 		atomic_set(&conn->usage, 2);
 
-		write_lock(&rxrpc_connection_lock);
-		list_add_tail(&conn->link, &rxrpc_connections);
-		list_add_tail(&conn->proc_link, &rxrpc_connection_proc_list);
-		write_unlock(&rxrpc_connection_lock);
+		write_lock(&rxnet->conn_lock);
+		list_add_tail(&conn->link, &rxnet->service_conns);
+		list_add_tail(&conn->proc_link, &rxnet->conn_proc_list);
+		write_unlock(&rxnet->conn_lock);
 
 		trace_rxrpc_conn(conn, rxrpc_conn_new_service,
 				 atomic_read(&conn->usage),
@@ -149,7 +150,8 @@ struct rxrpc_connection *rxrpc_prealloc_service_connection(gfp_t gfp)
  * Set up an incoming connection.  This is called in BH context with the RCU
  * read lock held.
  */
-void rxrpc_new_incoming_connection(struct rxrpc_connection *conn,
+void rxrpc_new_incoming_connection(struct rxrpc_sock *rx,
+				   struct rxrpc_connection *conn,
 				   struct sk_buff *skb)
 {
 	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
@@ -159,6 +161,7 @@ void rxrpc_new_incoming_connection(struct rxrpc_connection *conn,
 	conn->proto.epoch	= sp->hdr.epoch;
 	conn->proto.cid		= sp->hdr.cid & RXRPC_CIDMASK;
 	conn->params.service_id	= sp->hdr.serviceId;
+	conn->service_id	= sp->hdr.serviceId;
 	conn->security_ix	= sp->hdr.securityIndex;
 	conn->out_clientflag	= 0;
 	if (conn->security_ix)
@@ -166,6 +169,14 @@ void rxrpc_new_incoming_connection(struct rxrpc_connection *conn,
 	else
 		conn->state	= RXRPC_CONN_SERVICE;
 
+	/* See if we should upgrade the service.  This can only happen on the
+	 * first packet on a new connection.  Once done, it applies to all
+	 * subsequent calls on that connection.
+	 */
+	if (sp->hdr.userStatus == RXRPC_USERSTATUS_SERVICE_UPGRADE &&
+	    conn->service_id == rx->service_upgrade.from)
+		conn->service_id = rx->service_upgrade.to;
+
 	/* Make the connection a target for incoming packets. */
 	rxrpc_publish_service_conn(conn->params.peer, conn);
 
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index 45dba732a3b4..e56e23ed2229 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -1142,6 +1142,13 @@ void rxrpc_data_ready(struct sock *udp_sk)
 		if (sp->hdr.securityIndex != conn->security_ix)
 			goto wrong_security;
 
+		if (sp->hdr.serviceId != conn->service_id) {
+			if (!test_bit(RXRPC_CONN_PROBING_FOR_UPGRADE, &conn->flags) ||
+			    conn->service_id != conn->params.service_id)
+				goto reupgrade;
+			conn->service_id = sp->hdr.serviceId;
+		}
+		
 		if (sp->hdr.callNumber == 0) {
 			/* Connection-level packet */
 			_debug("CONN %p {%d}", conn, conn->debug_id);
@@ -1194,6 +1201,9 @@ void rxrpc_data_ready(struct sock *udp_sk)
 				rxrpc_input_implicit_end_call(conn, call);
 			call = NULL;
 		}
+
+		if (call && sp->hdr.serviceId != call->service_id)
+			call->service_id = sp->hdr.serviceId;
 	} else {
 		skew = 0;
 		call = NULL;
@@ -1237,11 +1247,18 @@ wrong_security:
 	skb->priority = RXKADINCONSISTENCY;
 	goto post_abort;
 
+reupgrade:
+	rcu_read_unlock();
+	trace_rxrpc_abort("UPG", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
+			  RX_PROTOCOL_ERROR, EBADMSG);
+	goto protocol_error;
+
 bad_message_unlock:
 	rcu_read_unlock();
 bad_message:
 	trace_rxrpc_abort("BAD", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
 			  RX_PROTOCOL_ERROR, EBADMSG);
+protocol_error:
 	skb->priority = RX_PROTOCOL_ERROR;
 post_abort:
 	skb->mark = RXRPC_SKB_MARK_LOCAL_ABORT;
diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c
index ff4864d550b8..38b99db30e54 100644
--- a/net/rxrpc/local_object.c
+++ b/net/rxrpc/local_object.c
@@ -25,9 +25,6 @@
 static void rxrpc_local_processor(struct work_struct *);
 static void rxrpc_local_rcu(struct rcu_head *);
 
-static DEFINE_MUTEX(rxrpc_local_mutex);
-static LIST_HEAD(rxrpc_local_endpoints);
-
 /*
  * Compare a local to an address.  Return -ve, 0 or +ve to indicate less than,
  * same or greater than.
@@ -77,13 +74,15 @@ static long rxrpc_local_cmp_key(const struct rxrpc_local *local,
 /*
  * Allocate a new local endpoint.
  */
-static struct rxrpc_local *rxrpc_alloc_local(const struct sockaddr_rxrpc *srx)
+static struct rxrpc_local *rxrpc_alloc_local(struct rxrpc_net *rxnet,
+					     const struct sockaddr_rxrpc *srx)
 {
 	struct rxrpc_local *local;
 
 	local = kzalloc(sizeof(struct rxrpc_local), GFP_KERNEL);
 	if (local) {
 		atomic_set(&local->usage, 1);
+		local->rxnet = rxnet;
 		INIT_LIST_HEAD(&local->link);
 		INIT_WORK(&local->processor, rxrpc_local_processor);
 		init_rwsem(&local->defrag_sem);
@@ -95,6 +94,7 @@ static struct rxrpc_local *rxrpc_alloc_local(const struct sockaddr_rxrpc *srx)
 		rwlock_init(&local->services_lock);
 		local->debug_id = atomic_inc_return(&rxrpc_debug_id);
 		memcpy(&local->srx, srx, sizeof(*srx));
+		local->srx.srx_service = 0;
 	}
 
 	_leave(" = %p", local);
@@ -105,7 +105,7 @@ static struct rxrpc_local *rxrpc_alloc_local(const struct sockaddr_rxrpc *srx)
  * create the local socket
  * - must be called with rxrpc_local_mutex locked
  */
-static int rxrpc_open_socket(struct rxrpc_local *local)
+static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net)
 {
 	struct sock *sock;
 	int ret, opt;
@@ -114,7 +114,7 @@ static int rxrpc_open_socket(struct rxrpc_local *local)
 	       local, local->srx.transport_type, local->srx.transport.family);
 
 	/* create a socket to represent the local endpoint */
-	ret = sock_create_kern(&init_net, local->srx.transport.family,
+	ret = sock_create_kern(net, local->srx.transport.family,
 			       local->srx.transport_type, 0, &local->socket);
 	if (ret < 0) {
 		_leave(" = %d [socket]", ret);
@@ -172,9 +172,11 @@ error:
 /*
  * Look up or create a new local endpoint using the specified local address.
  */
-struct rxrpc_local *rxrpc_lookup_local(const struct sockaddr_rxrpc *srx)
+struct rxrpc_local *rxrpc_lookup_local(struct net *net,
+				       const struct sockaddr_rxrpc *srx)
 {
 	struct rxrpc_local *local;
+	struct rxrpc_net *rxnet = rxrpc_net(net);
 	struct list_head *cursor;
 	const char *age;
 	long diff;
@@ -183,10 +185,10 @@ struct rxrpc_local *rxrpc_lookup_local(const struct sockaddr_rxrpc *srx)
 	_enter("{%d,%d,%pISp}",
 	       srx->transport_type, srx->transport.family, &srx->transport);
 
-	mutex_lock(&rxrpc_local_mutex);
+	mutex_lock(&rxnet->local_mutex);
 
-	for (cursor = rxrpc_local_endpoints.next;
-	     cursor != &rxrpc_local_endpoints;
+	for (cursor = rxnet->local_endpoints.next;
+	     cursor != &rxnet->local_endpoints;
 	     cursor = cursor->next) {
 		local = list_entry(cursor, struct rxrpc_local, link);
 
@@ -220,11 +222,11 @@ struct rxrpc_local *rxrpc_lookup_local(const struct sockaddr_rxrpc *srx)
 		goto found;
 	}
 
-	local = rxrpc_alloc_local(srx);
+	local = rxrpc_alloc_local(rxnet, srx);
 	if (!local)
 		goto nomem;
 
-	ret = rxrpc_open_socket(local);
+	ret = rxrpc_open_socket(local, net);
 	if (ret < 0)
 		goto sock_error;
 
@@ -232,7 +234,7 @@ struct rxrpc_local *rxrpc_lookup_local(const struct sockaddr_rxrpc *srx)
 	age = "new";
 
 found:
-	mutex_unlock(&rxrpc_local_mutex);
+	mutex_unlock(&rxnet->local_mutex);
 
 	_net("LOCAL %s %d {%pISp}",
 	     age, local->debug_id, &local->srx.transport);
@@ -243,13 +245,13 @@ found:
 nomem:
 	ret = -ENOMEM;
 sock_error:
-	mutex_unlock(&rxrpc_local_mutex);
+	mutex_unlock(&rxnet->local_mutex);
 	kfree(local);
 	_leave(" = %d", ret);
 	return ERR_PTR(ret);
 
 addr_in_use:
-	mutex_unlock(&rxrpc_local_mutex);
+	mutex_unlock(&rxnet->local_mutex);
 	_leave(" = -EADDRINUSE");
 	return ERR_PTR(-EADDRINUSE);
 }
@@ -273,6 +275,7 @@ void __rxrpc_put_local(struct rxrpc_local *local)
 static void rxrpc_local_destroyer(struct rxrpc_local *local)
 {
 	struct socket *socket = local->socket;
+	struct rxrpc_net *rxnet = local->rxnet;
 
 	_enter("%d", local->debug_id);
 
@@ -286,9 +289,9 @@ static void rxrpc_local_destroyer(struct rxrpc_local *local)
 	}
 	local->dead = true;
 
-	mutex_lock(&rxrpc_local_mutex);
+	mutex_lock(&rxnet->local_mutex);
 	list_del_init(&local->link);
-	mutex_unlock(&rxrpc_local_mutex);
+	mutex_unlock(&rxnet->local_mutex);
 
 	ASSERT(RB_EMPTY_ROOT(&local->client_conns));
 	ASSERT(!local->service);
@@ -357,7 +360,7 @@ static void rxrpc_local_rcu(struct rcu_head *rcu)
 /*
  * Verify the local endpoint list is empty by this point.
  */
-void __exit rxrpc_destroy_all_locals(void)
+void rxrpc_destroy_all_locals(struct rxrpc_net *rxnet)
 {
 	struct rxrpc_local *local;
 
@@ -365,15 +368,13 @@ void __exit rxrpc_destroy_all_locals(void)
 
 	flush_workqueue(rxrpc_workqueue);
 
-	if (!list_empty(&rxrpc_local_endpoints)) {
-		mutex_lock(&rxrpc_local_mutex);
-		list_for_each_entry(local, &rxrpc_local_endpoints, link) {
+	if (!list_empty(&rxnet->local_endpoints)) {
+		mutex_lock(&rxnet->local_mutex);
+		list_for_each_entry(local, &rxnet->local_endpoints, link) {
 			pr_err("AF_RXRPC: Leaked local %p {%d}\n",
 			       local, atomic_read(&local->usage));
 		}
-		mutex_unlock(&rxrpc_local_mutex);
+		mutex_unlock(&rxnet->local_mutex);
 		BUG();
 	}
-
-	rcu_barrier();
 }
diff --git a/net/rxrpc/net_ns.c b/net/rxrpc/net_ns.c
new file mode 100644
index 000000000000..7edceb8522f5
--- /dev/null
+++ b/net/rxrpc/net_ns.c
@@ -0,0 +1,84 @@
+/* rxrpc network namespace handling.
+ *
+ * Copyright (C) 2017 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#include <linux/proc_fs.h>
+#include "ar-internal.h"
+
+unsigned int rxrpc_net_id;
+
+/*
+ * Initialise a per-network namespace record.
+ */
+static __net_init int rxrpc_init_net(struct net *net)
+{
+	struct rxrpc_net *rxnet = rxrpc_net(net);
+	int ret;
+
+	get_random_bytes(&rxnet->epoch, sizeof(rxnet->epoch));
+	rxnet->epoch |= RXRPC_RANDOM_EPOCH;
+
+	INIT_LIST_HEAD(&rxnet->calls);
+	rwlock_init(&rxnet->call_lock);
+
+	INIT_LIST_HEAD(&rxnet->conn_proc_list);
+	INIT_LIST_HEAD(&rxnet->service_conns);
+	rwlock_init(&rxnet->conn_lock);
+	INIT_DELAYED_WORK(&rxnet->service_conn_reaper,
+			  rxrpc_service_connection_reaper);
+
+	rxnet->nr_client_conns = 0;
+	rxnet->nr_active_client_conns = 0;
+	rxnet->kill_all_client_conns = false;
+	spin_lock_init(&rxnet->client_conn_cache_lock);
+	spin_lock_init(&rxnet->client_conn_discard_lock);
+	INIT_LIST_HEAD(&rxnet->waiting_client_conns);
+	INIT_LIST_HEAD(&rxnet->active_client_conns);
+	INIT_LIST_HEAD(&rxnet->idle_client_conns);
+	INIT_DELAYED_WORK(&rxnet->client_conn_reaper,
+			  rxrpc_discard_expired_client_conns);
+
+	INIT_LIST_HEAD(&rxnet->local_endpoints);
+	mutex_init(&rxnet->local_mutex);
+	hash_init(rxnet->peer_hash);
+	spin_lock_init(&rxnet->peer_hash_lock);
+
+	ret = -ENOMEM;
+	rxnet->proc_net = proc_net_mkdir(net, "rxrpc", net->proc_net);
+	if (!rxnet->proc_net)
+		goto err_proc;
+
+	proc_create("calls", 0444, rxnet->proc_net, &rxrpc_call_seq_fops);
+	proc_create("conns", 0444, rxnet->proc_net, &rxrpc_connection_seq_fops);
+	return 0;
+
+err_proc:
+	return ret;
+}
+
+/*
+ * Clean up a per-network namespace record.
+ */
+static __net_exit void rxrpc_exit_net(struct net *net)
+{
+	struct rxrpc_net *rxnet = rxrpc_net(net);
+
+	rxrpc_destroy_all_calls(rxnet);
+	rxrpc_destroy_all_connections(rxnet);
+	rxrpc_destroy_all_locals(rxnet);
+	proc_remove(rxnet->proc_net);
+}
+
+struct pernet_operations rxrpc_net_ops = {
+	.init	= rxrpc_init_net,
+	.exit	= rxrpc_exit_net,
+	.id	= &rxrpc_net_id,
+	.size	= sizeof(struct rxrpc_net),
+};
diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c
index 5dab1ff3a6c2..5bd2d0fa4a03 100644
--- a/net/rxrpc/output.c
+++ b/net/rxrpc/output.c
@@ -292,6 +292,10 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb,
 	whdr._rsvd	= htons(sp->hdr._rsvd);
 	whdr.serviceId	= htons(call->service_id);
 
+	if (test_bit(RXRPC_CONN_PROBING_FOR_UPGRADE, &conn->flags) &&
+	    sp->hdr.seq == 1)
+		whdr.userStatus	= RXRPC_USERSTATUS_SERVICE_UPGRADE;
+
 	iov[0].iov_base = &whdr;
 	iov[0].iov_len = sizeof(whdr);
 	iov[1].iov_base = skb->head;
diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c
index 862eea6b266c..cfed3b27adf0 100644
--- a/net/rxrpc/peer_object.c
+++ b/net/rxrpc/peer_object.c
@@ -26,9 +26,6 @@
 #include <net/ip6_route.h>
 #include "ar-internal.h"
 
-static DEFINE_HASHTABLE(rxrpc_peer_hash, 10);
-static DEFINE_SPINLOCK(rxrpc_peer_hash_lock);
-
 /*
  * Hash a peer key.
  */
@@ -124,8 +121,9 @@ static struct rxrpc_peer *__rxrpc_lookup_peer_rcu(
 	unsigned long hash_key)
 {
 	struct rxrpc_peer *peer;
+	struct rxrpc_net *rxnet = local->rxnet;
 
-	hash_for_each_possible_rcu(rxrpc_peer_hash, peer, hash_link, hash_key) {
+	hash_for_each_possible_rcu(rxnet->peer_hash, peer, hash_link, hash_key) {
 		if (rxrpc_peer_cmp_key(peer, local, srx, hash_key) == 0) {
 			if (atomic_read(&peer->usage) == 0)
 				return NULL;
@@ -301,13 +299,14 @@ struct rxrpc_peer *rxrpc_lookup_incoming_peer(struct rxrpc_local *local,
 					      struct rxrpc_peer *prealloc)
 {
 	struct rxrpc_peer *peer;
+	struct rxrpc_net *rxnet = local->rxnet;
 	unsigned long hash_key;
 
 	hash_key = rxrpc_peer_hash_key(local, &prealloc->srx);
 	prealloc->local = local;
 	rxrpc_init_peer(prealloc, hash_key);
 
-	spin_lock(&rxrpc_peer_hash_lock);
+	spin_lock(&rxnet->peer_hash_lock);
 
 	/* Need to check that we aren't racing with someone else */
 	peer = __rxrpc_lookup_peer_rcu(local, &prealloc->srx, hash_key);
@@ -315,10 +314,10 @@ struct rxrpc_peer *rxrpc_lookup_incoming_peer(struct rxrpc_local *local,
 		peer = NULL;
 	if (!peer) {
 		peer = prealloc;
-		hash_add_rcu(rxrpc_peer_hash, &peer->hash_link, hash_key);
+		hash_add_rcu(rxnet->peer_hash, &peer->hash_link, hash_key);
 	}
 
-	spin_unlock(&rxrpc_peer_hash_lock);
+	spin_unlock(&rxnet->peer_hash_lock);
 	return peer;
 }
 
@@ -329,6 +328,7 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *local,
 				     struct sockaddr_rxrpc *srx, gfp_t gfp)
 {
 	struct rxrpc_peer *peer, *candidate;
+	struct rxrpc_net *rxnet = local->rxnet;
 	unsigned long hash_key = rxrpc_peer_hash_key(local, srx);
 
 	_enter("{%pISp}", &srx->transport);
@@ -350,17 +350,17 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *local,
 			return NULL;
 		}
 
-		spin_lock_bh(&rxrpc_peer_hash_lock);
+		spin_lock_bh(&rxnet->peer_hash_lock);
 
 		/* Need to check that we aren't racing with someone else */
 		peer = __rxrpc_lookup_peer_rcu(local, srx, hash_key);
 		if (peer && !rxrpc_get_peer_maybe(peer))
 			peer = NULL;
 		if (!peer)
-			hash_add_rcu(rxrpc_peer_hash,
+			hash_add_rcu(rxnet->peer_hash,
 				     &candidate->hash_link, hash_key);
 
-		spin_unlock_bh(&rxrpc_peer_hash_lock);
+		spin_unlock_bh(&rxnet->peer_hash_lock);
 
 		if (peer)
 			kfree(candidate);
@@ -379,11 +379,13 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *local,
  */
 void __rxrpc_put_peer(struct rxrpc_peer *peer)
 {
+	struct rxrpc_net *rxnet = peer->local->rxnet;
+
 	ASSERT(hlist_empty(&peer->error_targets));
 
-	spin_lock_bh(&rxrpc_peer_hash_lock);
+	spin_lock_bh(&rxnet->peer_hash_lock);
 	hash_del_rcu(&peer->hash_link);
-	spin_unlock_bh(&rxrpc_peer_hash_lock);
+	spin_unlock_bh(&rxnet->peer_hash_lock);
 
 	kfree_rcu(peer, rcu);
 }
diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c
index b9bcfbfb095c..7421656963a9 100644
--- a/net/rxrpc/proc.c
+++ b/net/rxrpc/proc.c
@@ -30,19 +30,25 @@ static const char *const rxrpc_conn_states[RXRPC_CONN__NR_STATES] = {
  */
 static void *rxrpc_call_seq_start(struct seq_file *seq, loff_t *_pos)
 {
+	struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq));
+
 	rcu_read_lock();
-	read_lock(&rxrpc_call_lock);
-	return seq_list_start_head(&rxrpc_calls, *_pos);
+	read_lock(&rxnet->call_lock);
+	return seq_list_start_head(&rxnet->calls, *_pos);
 }
 
 static void *rxrpc_call_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
-	return seq_list_next(v, &rxrpc_calls, pos);
+	struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq));
+
+	return seq_list_next(v, &rxnet->calls, pos);
 }
 
 static void rxrpc_call_seq_stop(struct seq_file *seq, void *v)
 {
-	read_unlock(&rxrpc_call_lock);
+	struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq));
+
+	read_unlock(&rxnet->call_lock);
 	rcu_read_unlock();
 }
 
@@ -52,10 +58,11 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v)
 	struct rxrpc_sock *rx;
 	struct rxrpc_peer *peer;
 	struct rxrpc_call *call;
+	struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq));
 	rxrpc_seq_t tx_hard_ack, rx_hard_ack;
 	char lbuff[50], rbuff[50];
 
-	if (v == &rxrpc_calls) {
+	if (v == &rxnet->calls) {
 		seq_puts(seq,
 			 "Proto Local                                          "
 			 " Remote                                         "
@@ -113,7 +120,8 @@ static const struct seq_operations rxrpc_call_seq_ops = {
 
 static int rxrpc_call_seq_open(struct inode *inode, struct file *file)
 {
-	return seq_open(file, &rxrpc_call_seq_ops);
+	return seq_open_net(inode, file, &rxrpc_call_seq_ops,
+			    sizeof(struct seq_net_private));
 }
 
 const struct file_operations rxrpc_call_seq_fops = {
@@ -129,27 +137,34 @@ const struct file_operations rxrpc_call_seq_fops = {
  */
 static void *rxrpc_connection_seq_start(struct seq_file *seq, loff_t *_pos)
 {
-	read_lock(&rxrpc_connection_lock);
-	return seq_list_start_head(&rxrpc_connection_proc_list, *_pos);
+	struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq));
+
+	read_lock(&rxnet->conn_lock);
+	return seq_list_start_head(&rxnet->conn_proc_list, *_pos);
 }
 
 static void *rxrpc_connection_seq_next(struct seq_file *seq, void *v,
 				       loff_t *pos)
 {
-	return seq_list_next(v, &rxrpc_connection_proc_list, pos);
+	struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq));
+
+	return seq_list_next(v, &rxnet->conn_proc_list, pos);
 }
 
 static void rxrpc_connection_seq_stop(struct seq_file *seq, void *v)
 {
-	read_unlock(&rxrpc_connection_lock);
+	struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq));
+
+	read_unlock(&rxnet->conn_lock);
 }
 
 static int rxrpc_connection_seq_show(struct seq_file *seq, void *v)
 {
 	struct rxrpc_connection *conn;
+	struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq));
 	char lbuff[50], rbuff[50];
 
-	if (v == &rxrpc_connection_proc_list) {
+	if (v == &rxnet->conn_proc_list) {
 		seq_puts(seq,
 			 "Proto Local                                          "
 			 " Remote                                         "
@@ -175,7 +190,7 @@ print:
 		   " %s %08x %08x %08x\n",
 		   lbuff,
 		   rbuff,
-		   conn->params.service_id,
+		   conn->service_id,
 		   conn->proto.cid,
 		   rxrpc_conn_is_service(conn) ? "Svc" : "Clt",
 		   atomic_read(&conn->usage),
@@ -197,7 +212,8 @@ static const struct seq_operations rxrpc_connection_seq_ops = {
 
 static int rxrpc_connection_seq_open(struct inode *inode, struct file *file)
 {
-	return seq_open(file, &rxrpc_connection_seq_ops);
+	return seq_open_net(inode, file, &rxrpc_connection_seq_ops,
+			    sizeof(struct seq_net_private));
 }
 
 const struct file_operations rxrpc_connection_seq_fops = {
diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c
index f9caf3b77509..bdece21f313d 100644
--- a/net/rxrpc/recvmsg.c
+++ b/net/rxrpc/recvmsg.c
@@ -522,8 +522,11 @@ try_again:
 	}
 
 	if (msg->msg_name) {
-		size_t len = sizeof(call->conn->params.peer->srx);
-		memcpy(msg->msg_name, &call->conn->params.peer->srx, len);
+		struct sockaddr_rxrpc *srx = msg->msg_name;
+		size_t len = sizeof(call->peer->srx);
+
+		memcpy(msg->msg_name, &call->peer->srx, len);
+		srx->srx_service = call->service_id;
 		msg->msg_namelen = len;
 	}
 
diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c
index 1bb9b2ccc267..46d1a1f0b55b 100644
--- a/net/rxrpc/rxkad.c
+++ b/net/rxrpc/rxkad.c
@@ -227,7 +227,9 @@ static int rxkad_secure_packet_encrypt(const struct rxrpc_call *call,
 	len &= ~(call->conn->size_align - 1);
 
 	sg_init_table(sg, nsg);
-	skb_to_sgvec(skb, sg, 0, len);
+	err = skb_to_sgvec(skb, sg, 0, len);
+	if (unlikely(err < 0))
+		goto out;
 	skcipher_request_set_crypt(req, sg, sg, len, iv.x);
 	crypto_skcipher_encrypt(req);
 
@@ -324,7 +326,7 @@ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb,
 	bool aborted;
 	u32 data_size, buf;
 	u16 check;
-	int nsg;
+	int nsg, ret;
 
 	_enter("");
 
@@ -342,7 +344,9 @@ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb,
 		goto nomem;
 
 	sg_init_table(sg, nsg);
-	skb_to_sgvec(skb, sg, offset, 8);
+	ret = skb_to_sgvec(skb, sg, offset, 8);
+	if (unlikely(ret < 0))
+		return ret;
 
 	/* start the decryption afresh */
 	memset(&iv, 0, sizeof(iv));
@@ -409,7 +413,7 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb,
 	bool aborted;
 	u32 data_size, buf;
 	u16 check;
-	int nsg;
+	int nsg, ret;
 
 	_enter(",{%d}", skb->len);
 
@@ -434,7 +438,12 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb,
 	}
 
 	sg_init_table(sg, nsg);
-	skb_to_sgvec(skb, sg, offset, len);
+	ret = skb_to_sgvec(skb, sg, offset, len);
+	if (unlikely(ret < 0)) {
+		if (sg != _sg)
+			kfree(sg);
+		return ret;
+	}
 
 	/* decrypt from the session key */
 	token = call->conn->params.key->payload.data[0];
@@ -640,7 +649,7 @@ static int rxkad_issue_challenge(struct rxrpc_connection *conn)
 	whdr.userStatus	= 0;
 	whdr.securityIndex = conn->security_ix;
 	whdr._rsvd	= 0;
-	whdr.serviceId	= htons(conn->params.service_id);
+	whdr.serviceId	= htons(conn->service_id);
 
 	iov[0].iov_base	= &whdr;
 	iov[0].iov_len	= sizeof(whdr);
diff --git a/net/rxrpc/security.c b/net/rxrpc/security.c
index 7d921e56e715..e9f428351293 100644
--- a/net/rxrpc/security.c
+++ b/net/rxrpc/security.c
@@ -121,7 +121,7 @@ int rxrpc_init_server_conn_security(struct rxrpc_connection *conn)
 
 	_enter("");
 
-	sprintf(kdesc, "%u:%u", conn->params.service_id, conn->security_ix);
+	sprintf(kdesc, "%u:%u", conn->service_id, conn->security_ix);
 
 	sec = rxrpc_security_lookup(conn->security_ix);
 	if (!sec) {
@@ -133,7 +133,8 @@ int rxrpc_init_server_conn_security(struct rxrpc_connection *conn)
 	read_lock(&local->services_lock);
 	rx = rcu_dereference_protected(local->service,
 				       lockdep_is_held(&local->services_lock));
-	if (rx && rx->srx.srx_service == conn->params.service_id)
+	if (rx && (rx->srx.srx_service == conn->service_id ||
+		   rx->second_service == conn->service_id))
 		goto found_service;
 
 	/* the service appears to have died */
diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
index 96ffa5d5733b..5a4801e7f560 100644
--- a/net/rxrpc/sendmsg.c
+++ b/net/rxrpc/sendmsg.c
@@ -366,7 +366,8 @@ static int rxrpc_sendmsg_cmsg(struct msghdr *msg,
 			      unsigned long *user_call_ID,
 			      enum rxrpc_command *command,
 			      u32 *abort_code,
-			      bool *_exclusive)
+			      bool *_exclusive,
+			      bool *_upgrade)
 {
 	struct cmsghdr *cmsg;
 	bool got_user_ID = false;
@@ -429,6 +430,13 @@ static int rxrpc_sendmsg_cmsg(struct msghdr *msg,
 			if (len != 0)
 				return -EINVAL;
 			break;
+
+		case RXRPC_UPGRADE_SERVICE:
+			*_upgrade = true;
+			if (len != 0)
+				return -EINVAL;
+			break;
+
 		default:
 			return -EINVAL;
 		}
@@ -447,7 +455,8 @@ static int rxrpc_sendmsg_cmsg(struct msghdr *msg,
  */
 static struct rxrpc_call *
 rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg,
-				  unsigned long user_call_ID, bool exclusive)
+				  unsigned long user_call_ID, bool exclusive,
+				  bool upgrade)
 	__releases(&rx->sk.sk_lock.slock)
 {
 	struct rxrpc_conn_parameters cp;
@@ -472,6 +481,7 @@ rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg,
 	cp.key			= rx->key;
 	cp.security_level	= rx->min_sec_level;
 	cp.exclusive		= rx->exclusive | exclusive;
+	cp.upgrade		= upgrade;
 	cp.service_id		= srx->srx_service;
 	call = rxrpc_new_client_call(rx, &cp, srx, user_call_ID, GFP_KERNEL);
 	/* The socket is now unlocked */
@@ -493,13 +503,14 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
 	struct rxrpc_call *call;
 	unsigned long user_call_ID = 0;
 	bool exclusive = false;
+	bool upgrade = true;
 	u32 abort_code = 0;
 	int ret;
 
 	_enter("");
 
 	ret = rxrpc_sendmsg_cmsg(msg, &user_call_ID, &cmd, &abort_code,
-				 &exclusive);
+				 &exclusive, &upgrade);
 	if (ret < 0)
 		goto error_release_sock;
 
@@ -521,7 +532,7 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
 		if (cmd != RXRPC_CMD_SEND_DATA)
 			goto error_release_sock;
 		call = rxrpc_new_client_call_for_sendmsg(rx, msg, user_call_ID,
-							 exclusive);
+							 exclusive, upgrade);
 		/* The socket is now unlocked... */
 		if (IS_ERR(call))
 			return PTR_ERR(call);
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 9fb84f0de6af..e70ed26485a2 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -649,6 +649,7 @@ config NET_EMATCH_IPSET
 
 config NET_CLS_ACT
 	bool "Actions"
+	select NET_CLS
 	---help---
 	  Say Y here if you want to use traffic control actions. Actions
 	  get attached to classifiers and are invoked after a successful
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index a90e8f355c00..aed6cf2e9fd8 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -28,6 +28,31 @@
 #include <net/act_api.h>
 #include <net/netlink.h>
 
+static int tcf_action_goto_chain_init(struct tc_action *a, struct tcf_proto *tp)
+{
+	u32 chain_index = a->tcfa_action & TC_ACT_EXT_VAL_MASK;
+
+	if (!tp)
+		return -EINVAL;
+	a->goto_chain = tcf_chain_get(tp->chain->block, chain_index, true);
+	if (!a->goto_chain)
+		return -ENOMEM;
+	return 0;
+}
+
+static void tcf_action_goto_chain_fini(struct tc_action *a)
+{
+	tcf_chain_put(a->goto_chain);
+}
+
+static void tcf_action_goto_chain_exec(const struct tc_action *a,
+				       struct tcf_result *res)
+{
+	const struct tcf_chain *chain = a->goto_chain;
+
+	res->goto_tp = rcu_dereference_bh(chain->filter_chain);
+}
+
 static void free_tcf(struct rcu_head *head)
 {
 	struct tc_action *p = container_of(head, struct tc_action, tcfa_rcu);
@@ -39,6 +64,8 @@ static void free_tcf(struct rcu_head *head)
 		kfree(p->act_cookie->data);
 		kfree(p->act_cookie);
 	}
+	if (p->goto_chain)
+		tcf_action_goto_chain_fini(p);
 
 	kfree(p);
 }
@@ -465,6 +492,8 @@ repeat:
 				else /* faulty graph, stop pipeline */
 					return TC_ACT_OK;
 			}
+		} else if (TC_ACT_EXT_CMP(ret, TC_ACT_GOTO_CHAIN)) {
+			tcf_action_goto_chain_exec(a, res);
 		}
 
 		if (ret != TC_ACT_PIPE)
@@ -570,9 +599,9 @@ static struct tc_cookie *nla_memdup_cookie(struct nlattr **tb)
 	return c;
 }
 
-struct tc_action *tcf_action_init_1(struct net *net, struct nlattr *nla,
-				    struct nlattr *est, char *name, int ovr,
-				    int bind)
+struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
+				    struct nlattr *nla, struct nlattr *est,
+				    char *name, int ovr, int bind)
 {
 	struct tc_action *a;
 	struct tc_action_ops *a_o;
@@ -657,6 +686,17 @@ struct tc_action *tcf_action_init_1(struct net *net, struct nlattr *nla,
 	if (err != ACT_P_CREATED)
 		module_put(a_o->owner);
 
+	if (TC_ACT_EXT_CMP(a->tcfa_action, TC_ACT_GOTO_CHAIN)) {
+		err = tcf_action_goto_chain_init(a, tp);
+		if (err) {
+			LIST_HEAD(actions);
+
+			list_add_tail(&a->list, &actions);
+			tcf_action_destroy(&actions, bind);
+			return ERR_PTR(err);
+		}
+	}
+
 	return a;
 
 err_mod:
@@ -680,8 +720,9 @@ static void cleanup_a(struct list_head *actions, int ovr)
 		a->tcfa_refcnt--;
 }
 
-int tcf_action_init(struct net *net, struct nlattr *nla, struct nlattr *est,
-		    char *name, int ovr, int bind, struct list_head *actions)
+int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
+		    struct nlattr *est, char *name, int ovr, int bind,
+		    struct list_head *actions)
 {
 	struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
 	struct tc_action *act;
@@ -693,7 +734,7 @@ int tcf_action_init(struct net *net, struct nlattr *nla, struct nlattr *est,
 		return err;
 
 	for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
-		act = tcf_action_init_1(net, tb[i], est, name, ovr, bind);
+		act = tcf_action_init_1(net, tp, tb[i], est, name, ovr, bind);
 		if (IS_ERR(act)) {
 			err = PTR_ERR(act);
 			goto err;
@@ -1020,7 +1061,7 @@ static int tcf_action_add(struct net *net, struct nlattr *nla,
 	int ret = 0;
 	LIST_HEAD(actions);
 
-	ret = tcf_action_init(net, nla, NULL, NULL, ovr, 0, &actions);
+	ret = tcf_action_init(net, NULL, nla, NULL, NULL, ovr, 0, &actions);
 	if (ret)
 		return ret;
 
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index ab6fdbd34db7..3317a2f579da 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -350,6 +350,7 @@ static int tcf_csum_sctp(struct sk_buff *skb, unsigned int ihl,
 	sctph->checksum = sctp_compute_cksum(skb,
 					     skb_network_offset(skb) + ihl);
 	skb->ip_summed = CHECKSUM_NONE;
+	skb->csum_not_inet = 0;
 
 	return 1;
 }
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 22f88b35a546..39da0c5801c9 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -106,13 +106,12 @@ static int tfilter_notify(struct net *net, struct sk_buff *oskb,
 
 static void tfilter_notify_chain(struct net *net, struct sk_buff *oskb,
 				 struct nlmsghdr *n,
-				 struct tcf_proto __rcu **chain, int event)
+				 struct tcf_chain *chain, int event)
 {
-	struct tcf_proto __rcu **it_chain;
 	struct tcf_proto *tp;
 
-	for (it_chain = chain; (tp = rtnl_dereference(*it_chain)) != NULL;
-	     it_chain = &tp->next)
+	for (tp = rtnl_dereference(chain->filter_chain);
+	     tp; tp = rtnl_dereference(tp->next))
 		tfilter_notify(net, oskb, n, tp, 0, event, false);
 }
 
@@ -125,11 +124,12 @@ static inline u32 tcf_auto_prio(struct tcf_proto *tp)
 	if (tp)
 		first = tp->prio - 1;
 
-	return first;
+	return TC_H_MAJ(first);
 }
 
 static struct tcf_proto *tcf_proto_create(const char *kind, u32 protocol,
-					  u32 prio, u32 parent, struct Qdisc *q)
+					  u32 prio, u32 parent, struct Qdisc *q,
+					  struct tcf_chain *chain)
 {
 	struct tcf_proto *tp;
 	int err;
@@ -165,6 +165,7 @@ static struct tcf_proto *tcf_proto_create(const char *kind, u32 protocol,
 	tp->prio = prio;
 	tp->classid = parent;
 	tp->q = q;
+	tp->chain = chain;
 
 	err = tp->ops->init(tp);
 	if (err) {
@@ -185,16 +186,226 @@ static void tcf_proto_destroy(struct tcf_proto *tp)
 	kfree_rcu(tp, rcu);
 }
 
-void tcf_destroy_chain(struct tcf_proto __rcu **fl)
+static struct tcf_chain *tcf_chain_create(struct tcf_block *block,
+					  u32 chain_index)
+{
+	struct tcf_chain *chain;
+
+	chain = kzalloc(sizeof(*chain), GFP_KERNEL);
+	if (!chain)
+		return NULL;
+	list_add_tail(&chain->list, &block->chain_list);
+	chain->block = block;
+	chain->index = chain_index;
+	chain->refcnt = 1;
+	return chain;
+}
+
+static void tcf_chain_flush(struct tcf_chain *chain)
 {
 	struct tcf_proto *tp;
 
-	while ((tp = rtnl_dereference(*fl)) != NULL) {
-		RCU_INIT_POINTER(*fl, tp->next);
+	if (*chain->p_filter_chain)
+		RCU_INIT_POINTER(*chain->p_filter_chain, NULL);
+	while ((tp = rtnl_dereference(chain->filter_chain)) != NULL) {
+		RCU_INIT_POINTER(chain->filter_chain, tp->next);
 		tcf_proto_destroy(tp);
 	}
 }
-EXPORT_SYMBOL(tcf_destroy_chain);
+
+static void tcf_chain_destroy(struct tcf_chain *chain)
+{
+	list_del(&chain->list);
+	tcf_chain_flush(chain);
+	kfree(chain);
+}
+
+struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index,
+				bool create)
+{
+	struct tcf_chain *chain;
+
+	list_for_each_entry(chain, &block->chain_list, list) {
+		if (chain->index == chain_index) {
+			chain->refcnt++;
+			return chain;
+		}
+	}
+	if (create)
+		return tcf_chain_create(block, chain_index);
+	else
+		return NULL;
+}
+EXPORT_SYMBOL(tcf_chain_get);
+
+void tcf_chain_put(struct tcf_chain *chain)
+{
+	/* Destroy unused chain, with exception of chain 0, which is the
+	 * default one and has to be always present.
+	 */
+	if (--chain->refcnt == 0 && !chain->filter_chain && chain->index != 0)
+		tcf_chain_destroy(chain);
+}
+EXPORT_SYMBOL(tcf_chain_put);
+
+static void
+tcf_chain_filter_chain_ptr_set(struct tcf_chain *chain,
+			       struct tcf_proto __rcu **p_filter_chain)
+{
+	chain->p_filter_chain = p_filter_chain;
+}
+
+int tcf_block_get(struct tcf_block **p_block,
+		  struct tcf_proto __rcu **p_filter_chain)
+{
+	struct tcf_block *block = kzalloc(sizeof(*block), GFP_KERNEL);
+	struct tcf_chain *chain;
+	int err;
+
+	if (!block)
+		return -ENOMEM;
+	INIT_LIST_HEAD(&block->chain_list);
+	/* Create chain 0 by default, it has to be always present. */
+	chain = tcf_chain_create(block, 0);
+	if (!chain) {
+		err = -ENOMEM;
+		goto err_chain_create;
+	}
+	tcf_chain_filter_chain_ptr_set(chain, p_filter_chain);
+	*p_block = block;
+	return 0;
+
+err_chain_create:
+	kfree(block);
+	return err;
+}
+EXPORT_SYMBOL(tcf_block_get);
+
+void tcf_block_put(struct tcf_block *block)
+{
+	struct tcf_chain *chain, *tmp;
+
+	if (!block)
+		return;
+
+	list_for_each_entry_safe(chain, tmp, &block->chain_list, list)
+		tcf_chain_destroy(chain);
+	kfree(block);
+}
+EXPORT_SYMBOL(tcf_block_put);
+
+/* Main classifier routine: scans classifier chain attached
+ * to this qdisc, (optionally) tests for protocol and asks
+ * specific classifiers.
+ */
+int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
+		 struct tcf_result *res, bool compat_mode)
+{
+	__be16 protocol = tc_skb_protocol(skb);
+#ifdef CONFIG_NET_CLS_ACT
+	const int max_reclassify_loop = 4;
+	const struct tcf_proto *orig_tp = tp;
+	const struct tcf_proto *first_tp;
+	int limit = 0;
+
+reclassify:
+#endif
+	for (; tp; tp = rcu_dereference_bh(tp->next)) {
+		int err;
+
+		if (tp->protocol != protocol &&
+		    tp->protocol != htons(ETH_P_ALL))
+			continue;
+
+		err = tp->classify(skb, tp, res);
+#ifdef CONFIG_NET_CLS_ACT
+		if (unlikely(err == TC_ACT_RECLASSIFY && !compat_mode)) {
+			first_tp = orig_tp;
+			goto reset;
+		} else if (unlikely(TC_ACT_EXT_CMP(err, TC_ACT_GOTO_CHAIN))) {
+			first_tp = res->goto_tp;
+			goto reset;
+		}
+#endif
+		if (err >= 0)
+			return err;
+	}
+
+	return TC_ACT_UNSPEC; /* signal: continue lookup */
+#ifdef CONFIG_NET_CLS_ACT
+reset:
+	if (unlikely(limit++ >= max_reclassify_loop)) {
+		net_notice_ratelimited("%s: reclassify loop, rule prio %u, protocol %02x\n",
+				       tp->q->ops->id, tp->prio & 0xffff,
+				       ntohs(tp->protocol));
+		return TC_ACT_SHOT;
+	}
+
+	tp = first_tp;
+	protocol = tc_skb_protocol(skb);
+	goto reclassify;
+#endif
+}
+EXPORT_SYMBOL(tcf_classify);
+
+struct tcf_chain_info {
+	struct tcf_proto __rcu **pprev;
+	struct tcf_proto __rcu *next;
+};
+
+static struct tcf_proto *tcf_chain_tp_prev(struct tcf_chain_info *chain_info)
+{
+	return rtnl_dereference(*chain_info->pprev);
+}
+
+static void tcf_chain_tp_insert(struct tcf_chain *chain,
+				struct tcf_chain_info *chain_info,
+				struct tcf_proto *tp)
+{
+	if (chain->p_filter_chain &&
+	    *chain_info->pprev == chain->filter_chain)
+		rcu_assign_pointer(*chain->p_filter_chain, tp);
+	RCU_INIT_POINTER(tp->next, tcf_chain_tp_prev(chain_info));
+	rcu_assign_pointer(*chain_info->pprev, tp);
+}
+
+static void tcf_chain_tp_remove(struct tcf_chain *chain,
+				struct tcf_chain_info *chain_info,
+				struct tcf_proto *tp)
+{
+	struct tcf_proto *next = rtnl_dereference(chain_info->next);
+
+	if (chain->p_filter_chain && tp == chain->filter_chain)
+		RCU_INIT_POINTER(*chain->p_filter_chain, next);
+	RCU_INIT_POINTER(*chain_info->pprev, next);
+}
+
+static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain,
+					   struct tcf_chain_info *chain_info,
+					   u32 protocol, u32 prio,
+					   bool prio_allocate)
+{
+	struct tcf_proto **pprev;
+	struct tcf_proto *tp;
+
+	/* Check the chain for existence of proto-tcf with this priority */
+	for (pprev = &chain->filter_chain;
+	     (tp = rtnl_dereference(*pprev)); pprev = &tp->next) {
+		if (tp->prio >= prio) {
+			if (tp->prio == prio) {
+				if (prio_allocate ||
+				    (tp->protocol != protocol && protocol))
+					return ERR_PTR(-EINVAL);
+			} else {
+				tp = NULL;
+			}
+			break;
+		}
+	}
+	chain_info->pprev = pprev;
+	chain_info->next = tp ? tp->next : NULL;
+	return tp;
+}
 
 /* Add/change/delete/get a filter node */
 
@@ -206,13 +417,14 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
 	struct tcmsg *t;
 	u32 protocol;
 	u32 prio;
-	u32 nprio;
+	bool prio_allocate;
 	u32 parent;
+	u32 chain_index;
 	struct net_device *dev;
 	struct Qdisc  *q;
-	struct tcf_proto __rcu **back;
-	struct tcf_proto __rcu **chain;
-	struct tcf_proto *next;
+	struct tcf_chain_info chain_info;
+	struct tcf_chain *chain = NULL;
+	struct tcf_block *block;
 	struct tcf_proto *tp;
 	const struct Qdisc_class_ops *cops;
 	unsigned long cl;
@@ -234,7 +446,7 @@ replay:
 	t = nlmsg_data(n);
 	protocol = TC_H_MIN(t->tcm_info);
 	prio = TC_H_MAJ(t->tcm_info);
-	nprio = prio;
+	prio_allocate = false;
 	parent = t->tcm_parent;
 	cl = 0;
 
@@ -250,6 +462,7 @@ replay:
 			 */
 			if (n->nlmsg_flags & NLM_F_CREATE) {
 				prio = TC_H_MAKE(0x80000000U, 0U);
+				prio_allocate = true;
 				break;
 			}
 			/* fall-through */
@@ -280,7 +493,7 @@ replay:
 	if (!cops)
 		return -EINVAL;
 
-	if (cops->tcf_chain == NULL)
+	if (!cops->tcf_block)
 		return -EOPNOTSUPP;
 
 	/* Do we search for filter, attached to class? */
@@ -291,34 +504,36 @@ replay:
 	}
 
 	/* And the last stroke */
-	chain = cops->tcf_chain(q, cl);
-	if (chain == NULL) {
+	block = cops->tcf_block(q, cl);
+	if (!block) {
 		err = -EINVAL;
 		goto errout;
 	}
+
+	chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0;
+	if (chain_index > TC_ACT_EXT_VAL_MASK) {
+		err = -EINVAL;
+		goto errout;
+	}
+	chain = tcf_chain_get(block, chain_index,
+			      n->nlmsg_type == RTM_NEWTFILTER);
+	if (!chain) {
+		err = n->nlmsg_type == RTM_NEWTFILTER ? -ENOMEM : -EINVAL;
+		goto errout;
+	}
+
 	if (n->nlmsg_type == RTM_DELTFILTER && prio == 0) {
 		tfilter_notify_chain(net, skb, n, chain, RTM_DELTFILTER);
-		tcf_destroy_chain(chain);
+		tcf_chain_flush(chain);
 		err = 0;
 		goto errout;
 	}
 
-	/* Check the chain for existence of proto-tcf with this priority */
-	for (back = chain;
-	     (tp = rtnl_dereference(*back)) != NULL;
-	     back = &tp->next) {
-		if (tp->prio >= prio) {
-			if (tp->prio == prio) {
-				if (!nprio ||
-				    (tp->protocol != protocol && protocol)) {
-					err = -EINVAL;
-					goto errout;
-				}
-			} else {
-				tp = NULL;
-			}
-			break;
-		}
+	tp = tcf_chain_tp_find(chain, &chain_info, protocol,
+			       prio, prio_allocate);
+	if (IS_ERR(tp)) {
+		err = PTR_ERR(tp);
+		goto errout;
 	}
 
 	if (tp == NULL) {
@@ -335,11 +550,11 @@ replay:
 			goto errout;
 		}
 
-		if (!nprio)
-			nprio = TC_H_MAJ(tcf_auto_prio(rtnl_dereference(*back)));
+		if (prio_allocate)
+			prio = tcf_auto_prio(tcf_chain_tp_prev(&chain_info));
 
 		tp = tcf_proto_create(nla_data(tca[TCA_KIND]),
-				      protocol, nprio, parent, q);
+				      protocol, prio, parent, q, chain);
 		if (IS_ERR(tp)) {
 			err = PTR_ERR(tp);
 			goto errout;
@@ -354,8 +569,7 @@ replay:
 
 	if (fh == 0) {
 		if (n->nlmsg_type == RTM_DELTFILTER && t->tcm_handle == 0) {
-			next = rtnl_dereference(tp->next);
-			RCU_INIT_POINTER(*back, next);
+			tcf_chain_tp_remove(chain, &chain_info, tp);
 			tfilter_notify(net, skb, n, tp, fh,
 				       RTM_DELTFILTER, false);
 			tcf_proto_destroy(tp);
@@ -384,11 +598,10 @@ replay:
 			err = tp->ops->delete(tp, fh, &last);
 			if (err)
 				goto errout;
-			next = rtnl_dereference(tp->next);
 			tfilter_notify(net, skb, n, tp, t->tcm_handle,
 				       RTM_DELTFILTER, false);
 			if (last) {
-				RCU_INIT_POINTER(*back, next);
+				tcf_chain_tp_remove(chain, &chain_info, tp);
 				tcf_proto_destroy(tp);
 			}
 			goto errout;
@@ -405,10 +618,8 @@ replay:
 	err = tp->ops->change(net, skb, tp, cl, t->tcm_handle, tca, &fh,
 			      n->nlmsg_flags & NLM_F_CREATE ? TCA_ACT_NOREPLACE : TCA_ACT_REPLACE);
 	if (err == 0) {
-		if (tp_created) {
-			RCU_INIT_POINTER(tp->next, rtnl_dereference(*back));
-			rcu_assign_pointer(*back, tp);
-		}
+		if (tp_created)
+			tcf_chain_tp_insert(chain, &chain_info, tp);
 		tfilter_notify(net, skb, n, tp, fh, RTM_NEWTFILTER, false);
 	} else {
 		if (tp_created)
@@ -416,6 +627,8 @@ replay:
 	}
 
 errout:
+	if (chain)
+		tcf_chain_put(chain);
 	if (cl)
 		cops->put(q, cl);
 	if (err == -EAGAIN)
@@ -444,6 +657,8 @@ static int tcf_fill_node(struct net *net, struct sk_buff *skb,
 	tcm->tcm_info = TC_H_MAKE(tp->prio, tp->protocol);
 	if (nla_put_string(skb, TCA_KIND, tp->ops->kind))
 		goto nla_put_failure;
+	if (nla_put_u32(skb, TCA_CHAIN, tp->chain->index))
+		goto nla_put_failure;
 	tcm->tcm_handle = fh;
 	if (RTM_DELTFILTER != event) {
 		tcm->tcm_handle = 0;
@@ -500,22 +715,76 @@ static int tcf_node_dump(struct tcf_proto *tp, unsigned long n,
 			     RTM_NEWTFILTER);
 }
 
+static bool tcf_chain_dump(struct tcf_chain *chain, struct sk_buff *skb,
+			   struct netlink_callback *cb,
+			   long index_start, long *p_index)
+{
+	struct net *net = sock_net(skb->sk);
+	struct tcmsg *tcm = nlmsg_data(cb->nlh);
+	struct tcf_dump_args arg;
+	struct tcf_proto *tp;
+
+	for (tp = rtnl_dereference(chain->filter_chain);
+	     tp; tp = rtnl_dereference(tp->next), (*p_index)++) {
+		if (*p_index < index_start)
+			continue;
+		if (TC_H_MAJ(tcm->tcm_info) &&
+		    TC_H_MAJ(tcm->tcm_info) != tp->prio)
+			continue;
+		if (TC_H_MIN(tcm->tcm_info) &&
+		    TC_H_MIN(tcm->tcm_info) != tp->protocol)
+			continue;
+		if (*p_index > index_start)
+			memset(&cb->args[1], 0,
+			       sizeof(cb->args) - sizeof(cb->args[0]));
+		if (cb->args[1] == 0) {
+			if (tcf_fill_node(net, skb, tp, 0,
+					  NETLINK_CB(cb->skb).portid,
+					  cb->nlh->nlmsg_seq, NLM_F_MULTI,
+					  RTM_NEWTFILTER) <= 0)
+				return false;
+
+			cb->args[1] = 1;
+		}
+		if (!tp->ops->walk)
+			continue;
+		arg.w.fn = tcf_node_dump;
+		arg.skb = skb;
+		arg.cb = cb;
+		arg.w.stop = 0;
+		arg.w.skip = cb->args[1] - 1;
+		arg.w.count = 0;
+		tp->ops->walk(tp, &arg.w);
+		cb->args[1] = arg.w.count + 1;
+		if (arg.w.stop)
+			return false;
+	}
+	return true;
+}
+
 /* called with RTNL */
 static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
 {
 	struct net *net = sock_net(skb->sk);
-	int t;
-	int s_t;
+	struct nlattr *tca[TCA_MAX + 1];
 	struct net_device *dev;
 	struct Qdisc *q;
-	struct tcf_proto *tp, __rcu **chain;
+	struct tcf_block *block;
+	struct tcf_chain *chain;
 	struct tcmsg *tcm = nlmsg_data(cb->nlh);
 	unsigned long cl = 0;
 	const struct Qdisc_class_ops *cops;
-	struct tcf_dump_args arg;
+	long index_start;
+	long index;
+	int err;
 
 	if (nlmsg_len(cb->nlh) < sizeof(*tcm))
 		return skb->len;
+
+	err = nlmsg_parse(cb->nlh, sizeof(*tcm), tca, TCA_MAX, NULL, NULL);
+	if (err)
+		return err;
+
 	dev = __dev_get_by_index(net, tcm->tcm_ifindex);
 	if (!dev)
 		return skb->len;
@@ -529,56 +798,29 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
 	cops = q->ops->cl_ops;
 	if (!cops)
 		goto errout;
-	if (cops->tcf_chain == NULL)
+	if (!cops->tcf_block)
 		goto errout;
 	if (TC_H_MIN(tcm->tcm_parent)) {
 		cl = cops->get(q, tcm->tcm_parent);
 		if (cl == 0)
 			goto errout;
 	}
-	chain = cops->tcf_chain(q, cl);
-	if (chain == NULL)
+	block = cops->tcf_block(q, cl);
+	if (!block)
 		goto errout;
 
-	s_t = cb->args[0];
-
-	for (tp = rtnl_dereference(*chain), t = 0;
-	     tp; tp = rtnl_dereference(tp->next), t++) {
-		if (t < s_t)
-			continue;
-		if (TC_H_MAJ(tcm->tcm_info) &&
-		    TC_H_MAJ(tcm->tcm_info) != tp->prio)
-			continue;
-		if (TC_H_MIN(tcm->tcm_info) &&
-		    TC_H_MIN(tcm->tcm_info) != tp->protocol)
-			continue;
-		if (t > s_t)
-			memset(&cb->args[1], 0,
-			       sizeof(cb->args)-sizeof(cb->args[0]));
-		if (cb->args[1] == 0) {
-			if (tcf_fill_node(net, skb, tp, 0,
-					  NETLINK_CB(cb->skb).portid,
-					  cb->nlh->nlmsg_seq, NLM_F_MULTI,
-					  RTM_NEWTFILTER) <= 0)
-				break;
+	index_start = cb->args[0];
+	index = 0;
 
-			cb->args[1] = 1;
-		}
-		if (tp->ops->walk == NULL)
+	list_for_each_entry(chain, &block->chain_list, list) {
+		if (tca[TCA_CHAIN] &&
+		    nla_get_u32(tca[TCA_CHAIN]) != chain->index)
 			continue;
-		arg.w.fn = tcf_node_dump;
-		arg.skb = skb;
-		arg.cb = cb;
-		arg.w.stop = 0;
-		arg.w.skip = cb->args[1] - 1;
-		arg.w.count = 0;
-		tp->ops->walk(tp, &arg.w);
-		cb->args[1] = arg.w.count + 1;
-		if (arg.w.stop)
+		if (!tcf_chain_dump(chain, skb, cb, index_start, &index))
 			break;
 	}
 
-	cb->args[0] = t;
+	cb->args[0] = index;
 
 errout:
 	if (cl)
@@ -608,8 +850,9 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
 		struct tc_action *act;
 
 		if (exts->police && tb[exts->police]) {
-			act = tcf_action_init_1(net, tb[exts->police], rate_tlv,
-						"police", ovr, TCA_ACT_BIND);
+			act = tcf_action_init_1(net, tp, tb[exts->police],
+						rate_tlv, "police", ovr,
+						TCA_ACT_BIND);
 			if (IS_ERR(act))
 				return PTR_ERR(act);
 
@@ -620,8 +863,8 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
 			LIST_HEAD(actions);
 			int err, i = 0;
 
-			err = tcf_action_init(net, tb[exts->action], rate_tlv,
-					      NULL, ovr, TCA_ACT_BIND,
+			err = tcf_action_init(net, tp, tb[exts->action],
+					      rate_tlv, NULL, ovr, TCA_ACT_BIND,
 					      &actions);
 			if (err)
 				return err;
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index 5ebeae996e63..a9c56ad4533a 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -70,6 +70,7 @@ static int cls_bpf_exec_opcode(int code)
 	case TC_ACT_OK:
 	case TC_ACT_SHOT:
 	case TC_ACT_STOLEN:
+	case TC_ACT_TRAP:
 	case TC_ACT_REDIRECT:
 	case TC_ACT_UNSPEC:
 		return code;
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index ca526c0881bd..33feaee197cf 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -49,6 +49,8 @@ struct fl_flow_key {
 	};
 	struct flow_dissector_key_ports enc_tp;
 	struct flow_dissector_key_mpls mpls;
+	struct flow_dissector_key_tcp tcp;
+	struct flow_dissector_key_ip ip;
 } __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */
 
 struct fl_flow_mask_range {
@@ -424,6 +426,12 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = {
 	[TCA_FLOWER_KEY_MPLS_BOS]	= { .type = NLA_U8 },
 	[TCA_FLOWER_KEY_MPLS_TC]	= { .type = NLA_U8 },
 	[TCA_FLOWER_KEY_MPLS_LABEL]	= { .type = NLA_U32 },
+	[TCA_FLOWER_KEY_TCP_FLAGS]	= { .type = NLA_U16 },
+	[TCA_FLOWER_KEY_TCP_FLAGS_MASK]	= { .type = NLA_U16 },
+	[TCA_FLOWER_KEY_IP_TOS]		= { .type = NLA_U8 },
+	[TCA_FLOWER_KEY_IP_TOS_MASK]	= { .type = NLA_U8 },
+	[TCA_FLOWER_KEY_IP_TTL]		= { .type = NLA_U8 },
+	[TCA_FLOWER_KEY_IP_TTL_MASK]	= { .type = NLA_U8 },
 };
 
 static void fl_set_key_val(struct nlattr **tb,
@@ -525,6 +533,19 @@ static int fl_set_key_flags(struct nlattr **tb,
 	return 0;
 }
 
+static void fl_set_key_ip(struct nlattr **tb,
+			  struct flow_dissector_key_ip *key,
+			  struct flow_dissector_key_ip *mask)
+{
+		fl_set_key_val(tb, &key->tos, TCA_FLOWER_KEY_IP_TOS,
+			       &mask->tos, TCA_FLOWER_KEY_IP_TOS_MASK,
+			       sizeof(key->tos));
+
+		fl_set_key_val(tb, &key->ttl, TCA_FLOWER_KEY_IP_TTL,
+			       &mask->ttl, TCA_FLOWER_KEY_IP_TTL_MASK,
+			       sizeof(key->ttl));
+}
+
 static int fl_set_key(struct net *net, struct nlattr **tb,
 		      struct fl_flow_key *key, struct fl_flow_key *mask)
 {
@@ -567,6 +588,7 @@ static int fl_set_key(struct net *net, struct nlattr **tb,
 		fl_set_key_val(tb, &key->basic.ip_proto, TCA_FLOWER_KEY_IP_PROTO,
 			       &mask->basic.ip_proto, TCA_FLOWER_UNSPEC,
 			       sizeof(key->basic.ip_proto));
+		fl_set_key_ip(tb, &key->ip, &mask->ip);
 	}
 
 	if (tb[TCA_FLOWER_KEY_IPV4_SRC] || tb[TCA_FLOWER_KEY_IPV4_DST]) {
@@ -596,6 +618,9 @@ static int fl_set_key(struct net *net, struct nlattr **tb,
 		fl_set_key_val(tb, &key->tp.dst, TCA_FLOWER_KEY_TCP_DST,
 			       &mask->tp.dst, TCA_FLOWER_KEY_TCP_DST_MASK,
 			       sizeof(key->tp.dst));
+		fl_set_key_val(tb, &key->tcp.flags, TCA_FLOWER_KEY_TCP_FLAGS,
+			       &mask->tcp.flags, TCA_FLOWER_KEY_TCP_FLAGS_MASK,
+			       sizeof(key->tcp.flags));
 	} else if (key->basic.ip_proto == IPPROTO_UDP) {
 		fl_set_key_val(tb, &key->tp.src, TCA_FLOWER_KEY_UDP_SRC,
 			       &mask->tp.src, TCA_FLOWER_KEY_UDP_SRC_MASK,
@@ -767,6 +792,10 @@ static void fl_init_dissector(struct cls_fl_head *head,
 	FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
 			     FLOW_DISSECTOR_KEY_PORTS, tp);
 	FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
+			     FLOW_DISSECTOR_KEY_IP, ip);
+	FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
+			     FLOW_DISSECTOR_KEY_TCP, tcp);
+	FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
 			     FLOW_DISSECTOR_KEY_ICMP, icmp);
 	FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
 			     FLOW_DISSECTOR_KEY_ARP, arp);
@@ -1074,6 +1103,19 @@ static int fl_dump_key_mpls(struct sk_buff *skb,
 	return 0;
 }
 
+static int fl_dump_key_ip(struct sk_buff *skb,
+			  struct flow_dissector_key_ip *key,
+			  struct flow_dissector_key_ip *mask)
+{
+	if (fl_dump_key_val(skb, &key->tos, TCA_FLOWER_KEY_IP_TOS, &mask->tos,
+			    TCA_FLOWER_KEY_IP_TOS_MASK, sizeof(key->tos)) ||
+	    fl_dump_key_val(skb, &key->ttl, TCA_FLOWER_KEY_IP_TTL, &mask->ttl,
+			    TCA_FLOWER_KEY_IP_TTL_MASK, sizeof(key->ttl)))
+		return -1;
+
+	return 0;
+}
+
 static int fl_dump_key_vlan(struct sk_buff *skb,
 			    struct flow_dissector_key_vlan *vlan_key,
 			    struct flow_dissector_key_vlan *vlan_mask)
@@ -1187,9 +1229,10 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
 
 	if ((key->basic.n_proto == htons(ETH_P_IP) ||
 	     key->basic.n_proto == htons(ETH_P_IPV6)) &&
-	    fl_dump_key_val(skb, &key->basic.ip_proto, TCA_FLOWER_KEY_IP_PROTO,
+	    (fl_dump_key_val(skb, &key->basic.ip_proto, TCA_FLOWER_KEY_IP_PROTO,
 			    &mask->basic.ip_proto, TCA_FLOWER_UNSPEC,
-			    sizeof(key->basic.ip_proto)))
+			    sizeof(key->basic.ip_proto)) ||
+	    fl_dump_key_ip(skb, &key->ip, &mask->ip)))
 		goto nla_put_failure;
 
 	if (key->control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS &&
@@ -1215,7 +1258,10 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
 			     sizeof(key->tp.src)) ||
 	     fl_dump_key_val(skb, &key->tp.dst, TCA_FLOWER_KEY_TCP_DST,
 			     &mask->tp.dst, TCA_FLOWER_KEY_TCP_DST_MASK,
-			     sizeof(key->tp.dst))))
+			     sizeof(key->tp.dst)) ||
+	     fl_dump_key_val(skb, &key->tcp.flags, TCA_FLOWER_KEY_TCP_FLAGS,
+			     &mask->tcp.flags, TCA_FLOWER_KEY_TCP_FLAGS_MASK,
+			     sizeof(key->tcp.flags))))
 		goto nla_put_failure;
 	else if (key->basic.ip_proto == IPPROTO_UDP &&
 		 (fl_dump_key_val(skb, &key->tp.src, TCA_FLOWER_KEY_UDP_SRC,
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index e88342fde1bc..5d95401bbc02 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -163,7 +163,7 @@ int register_qdisc(struct Qdisc_ops *qops)
 		if (!(cops->get && cops->put && cops->walk && cops->leaf))
 			goto out_einval;
 
-		if (cops->tcf_chain && !(cops->bind_tcf && cops->unbind_tcf))
+		if (cops->tcf_block && !(cops->bind_tcf && cops->unbind_tcf))
 			goto out_einval;
 	}
 
@@ -1878,54 +1878,6 @@ done:
 	return skb->len;
 }
 
-/* Main classifier routine: scans classifier chain attached
- * to this qdisc, (optionally) tests for protocol and asks
- * specific classifiers.
- */
-int tc_classify(struct sk_buff *skb, const struct tcf_proto *tp,
-		struct tcf_result *res, bool compat_mode)
-{
-	__be16 protocol = tc_skb_protocol(skb);
-#ifdef CONFIG_NET_CLS_ACT
-	const int max_reclassify_loop = 4;
-	const struct tcf_proto *old_tp = tp;
-	int limit = 0;
-
-reclassify:
-#endif
-	for (; tp; tp = rcu_dereference_bh(tp->next)) {
-		int err;
-
-		if (tp->protocol != protocol &&
-		    tp->protocol != htons(ETH_P_ALL))
-			continue;
-
-		err = tp->classify(skb, tp, res);
-#ifdef CONFIG_NET_CLS_ACT
-		if (unlikely(err == TC_ACT_RECLASSIFY && !compat_mode))
-			goto reset;
-#endif
-		if (err >= 0)
-			return err;
-	}
-
-	return TC_ACT_UNSPEC; /* signal: continue lookup */
-#ifdef CONFIG_NET_CLS_ACT
-reset:
-	if (unlikely(limit++ >= max_reclassify_loop)) {
-		net_notice_ratelimited("%s: reclassify loop, rule prio %u, protocol %02x\n",
-				       tp->q->ops->id, tp->prio & 0xffff,
-				       ntohs(tp->protocol));
-		return TC_ACT_SHOT;
-	}
-
-	tp = old_tp;
-	protocol = tc_skb_protocol(skb);
-	goto reclassify;
-#endif
-}
-EXPORT_SYMBOL(tc_classify);
-
 #ifdef CONFIG_PROC_FS
 static int psched_show(struct seq_file *seq, void *v)
 {
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index 40cbceed4de8..de162592eee0 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -43,6 +43,7 @@
 struct atm_flow_data {
 	struct Qdisc		*q;	/* FIFO, TBF, etc. */
 	struct tcf_proto __rcu	*filter_list;
+	struct tcf_block	*block;
 	struct atm_vcc		*vcc;	/* VCC; NULL if VCC is closed */
 	void			(*old_pop)(struct atm_vcc *vcc,
 					   struct sk_buff *skb); /* chaining */
@@ -143,7 +144,7 @@ static void atm_tc_put(struct Qdisc *sch, unsigned long cl)
 	list_del_init(&flow->list);
 	pr_debug("atm_tc_put: qdisc %p\n", flow->q);
 	qdisc_destroy(flow->q);
-	tcf_destroy_chain(&flow->filter_list);
+	tcf_block_put(flow->block);
 	if (flow->sock) {
 		pr_debug("atm_tc_put: f_count %ld\n",
 			file_count(flow->sock->file));
@@ -274,7 +275,13 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent,
 		error = -ENOBUFS;
 		goto err_out;
 	}
-	RCU_INIT_POINTER(flow->filter_list, NULL);
+
+	error = tcf_block_get(&flow->block, &flow->filter_list);
+	if (error) {
+		kfree(flow);
+		goto err_out;
+	}
+
 	flow->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid);
 	if (!flow->q)
 		flow->q = &noop_qdisc;
@@ -346,14 +353,13 @@ static void atm_tc_walk(struct Qdisc *sch, struct qdisc_walker *walker)
 	}
 }
 
-static struct tcf_proto __rcu **atm_tc_find_tcf(struct Qdisc *sch,
-						unsigned long cl)
+static struct tcf_block *atm_tc_tcf_block(struct Qdisc *sch, unsigned long cl)
 {
 	struct atm_qdisc_data *p = qdisc_priv(sch);
 	struct atm_flow_data *flow = (struct atm_flow_data *)cl;
 
 	pr_debug("atm_tc_find_tcf(sch %p,[qdisc %p],flow %p)\n", sch, p, flow);
-	return flow ? &flow->filter_list : &p->link.filter_list;
+	return flow ? flow->block : p->link.block;
 }
 
 /* --------------------------- Qdisc operations ---------------------------- */
@@ -377,7 +383,7 @@ static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 		list_for_each_entry(flow, &p->flows, list) {
 			fl = rcu_dereference_bh(flow->filter_list);
 			if (fl) {
-				result = tc_classify(skb, fl, &res, true);
+				result = tcf_classify(skb, fl, &res, true);
 				if (result < 0)
 					continue;
 				flow = (struct atm_flow_data *)res.class;
@@ -400,6 +406,7 @@ done:
 		switch (result) {
 		case TC_ACT_QUEUED:
 		case TC_ACT_STOLEN:
+		case TC_ACT_TRAP:
 			__qdisc_drop(skb, to_free);
 			return NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
 		case TC_ACT_SHOT:
@@ -524,6 +531,7 @@ static struct sk_buff *atm_tc_peek(struct Qdisc *sch)
 static int atm_tc_init(struct Qdisc *sch, struct nlattr *opt)
 {
 	struct atm_qdisc_data *p = qdisc_priv(sch);
+	int err;
 
 	pr_debug("atm_tc_init(sch %p,[qdisc %p],opt %p)\n", sch, p, opt);
 	INIT_LIST_HEAD(&p->flows);
@@ -534,7 +542,11 @@ static int atm_tc_init(struct Qdisc *sch, struct nlattr *opt)
 	if (!p->link.q)
 		p->link.q = &noop_qdisc;
 	pr_debug("atm_tc_init: link (%p) qdisc %p\n", &p->link, p->link.q);
-	RCU_INIT_POINTER(p->link.filter_list, NULL);
+
+	err = tcf_block_get(&p->link.block, &p->link.filter_list);
+	if (err)
+		return err;
+
 	p->link.vcc = NULL;
 	p->link.sock = NULL;
 	p->link.classid = sch->handle;
@@ -561,7 +573,7 @@ static void atm_tc_destroy(struct Qdisc *sch)
 
 	pr_debug("atm_tc_destroy(sch %p,[qdisc %p])\n", sch, p);
 	list_for_each_entry(flow, &p->flows, list)
-		tcf_destroy_chain(&flow->filter_list);
+		tcf_block_put(flow->block);
 
 	list_for_each_entry_safe(flow, tmp, &p->flows, list) {
 		if (flow->ref > 1)
@@ -646,7 +658,7 @@ static const struct Qdisc_class_ops atm_class_ops = {
 	.change		= atm_tc_change,
 	.delete		= atm_tc_delete,
 	.walk		= atm_tc_walk,
-	.tcf_chain	= atm_tc_find_tcf,
+	.tcf_block	= atm_tc_tcf_block,
 	.bind_tcf	= atm_tc_bind_filter,
 	.unbind_tcf	= atm_tc_put,
 	.dump		= atm_tc_dump_class,
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 7415859fd4c3..481036f6b54e 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -127,6 +127,7 @@ struct cbq_class {
 	struct tc_cbq_xstats	xstats;
 
 	struct tcf_proto __rcu	*filter_list;
+	struct tcf_block	*block;
 
 	int			refcnt;
 	int			filters;
@@ -233,7 +234,7 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
 		/*
 		 * Step 2+n. Apply classifier.
 		 */
-		result = tc_classify(skb, fl, &res, true);
+		result = tcf_classify(skb, fl, &res, true);
 		if (!fl || result < 0)
 			goto fallback;
 
@@ -253,6 +254,7 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
 		switch (result) {
 		case TC_ACT_QUEUED:
 		case TC_ACT_STOLEN:
+		case TC_ACT_TRAP:
 			*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
 		case TC_ACT_SHOT:
 			return NULL;
@@ -1405,7 +1407,7 @@ static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
 
 	WARN_ON(cl->filters);
 
-	tcf_destroy_chain(&cl->filter_list);
+	tcf_block_put(cl->block);
 	qdisc_destroy(cl->q);
 	qdisc_put_rtab(cl->R_tab);
 	gen_kill_estimator(&cl->rate_est);
@@ -1430,7 +1432,7 @@ static void cbq_destroy(struct Qdisc *sch)
 	 */
 	for (h = 0; h < q->clhash.hashsize; h++) {
 		hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode)
-			tcf_destroy_chain(&cl->filter_list);
+			tcf_block_put(cl->block);
 	}
 	for (h = 0; h < q->clhash.hashsize; h++) {
 		hlist_for_each_entry_safe(cl, next, &q->clhash.hash[h],
@@ -1585,12 +1587,19 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
 	if (cl == NULL)
 		goto failure;
 
+	err = tcf_block_get(&cl->block, &cl->filter_list);
+	if (err) {
+		kfree(cl);
+		return err;
+	}
+
 	if (tca[TCA_RATE]) {
 		err = gen_new_estimator(&cl->bstats, NULL, &cl->rate_est,
 					NULL,
 					qdisc_root_sleeping_running(sch),
 					tca[TCA_RATE]);
 		if (err) {
+			tcf_block_put(cl->block);
 			kfree(cl);
 			goto failure;
 		}
@@ -1688,8 +1697,7 @@ static int cbq_delete(struct Qdisc *sch, unsigned long arg)
 	return 0;
 }
 
-static struct tcf_proto __rcu **cbq_find_tcf(struct Qdisc *sch,
-					     unsigned long arg)
+static struct tcf_block *cbq_tcf_block(struct Qdisc *sch, unsigned long arg)
 {
 	struct cbq_sched_data *q = qdisc_priv(sch);
 	struct cbq_class *cl = (struct cbq_class *)arg;
@@ -1697,7 +1705,7 @@ static struct tcf_proto __rcu **cbq_find_tcf(struct Qdisc *sch,
 	if (cl == NULL)
 		cl = &q->link;
 
-	return &cl->filter_list;
+	return cl->block;
 }
 
 static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
@@ -1756,7 +1764,7 @@ static const struct Qdisc_class_ops cbq_class_ops = {
 	.change		=	cbq_change_class,
 	.delete		=	cbq_delete,
 	.walk		=	cbq_walk,
-	.tcf_chain	=	cbq_find_tcf,
+	.tcf_block	=	cbq_tcf_block,
 	.bind_tcf	=	cbq_bind_filter,
 	.unbind_tcf	=	cbq_unbind_filter,
 	.dump		=	cbq_dump_class,
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index 58a8c32eab23..a413dc1c2098 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -36,6 +36,7 @@ struct drr_class {
 struct drr_sched {
 	struct list_head		active;
 	struct tcf_proto __rcu		*filter_list;
+	struct tcf_block		*block;
 	struct Qdisc_class_hash		clhash;
 };
 
@@ -190,15 +191,14 @@ static void drr_put_class(struct Qdisc *sch, unsigned long arg)
 		drr_destroy_class(sch, cl);
 }
 
-static struct tcf_proto __rcu **drr_tcf_chain(struct Qdisc *sch,
-					      unsigned long cl)
+static struct tcf_block *drr_tcf_block(struct Qdisc *sch, unsigned long cl)
 {
 	struct drr_sched *q = qdisc_priv(sch);
 
 	if (cl)
 		return NULL;
 
-	return &q->filter_list;
+	return q->block;
 }
 
 static unsigned long drr_bind_tcf(struct Qdisc *sch, unsigned long parent,
@@ -333,12 +333,13 @@ static struct drr_class *drr_classify(struct sk_buff *skb, struct Qdisc *sch,
 
 	*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
 	fl = rcu_dereference_bh(q->filter_list);
-	result = tc_classify(skb, fl, &res, false);
+	result = tcf_classify(skb, fl, &res, false);
 	if (result >= 0) {
 #ifdef CONFIG_NET_CLS_ACT
 		switch (result) {
 		case TC_ACT_QUEUED:
 		case TC_ACT_STOLEN:
+		case TC_ACT_TRAP:
 			*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
 		case TC_ACT_SHOT:
 			return NULL;
@@ -431,6 +432,9 @@ static int drr_init_qdisc(struct Qdisc *sch, struct nlattr *opt)
 	struct drr_sched *q = qdisc_priv(sch);
 	int err;
 
+	err = tcf_block_get(&q->block, &q->filter_list);
+	if (err)
+		return err;
 	err = qdisc_class_hash_init(&q->clhash);
 	if (err < 0)
 		return err;
@@ -462,7 +466,7 @@ static void drr_destroy_qdisc(struct Qdisc *sch)
 	struct hlist_node *next;
 	unsigned int i;
 
-	tcf_destroy_chain(&q->filter_list);
+	tcf_block_put(q->block);
 
 	for (i = 0; i < q->clhash.hashsize; i++) {
 		hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i],
@@ -477,7 +481,7 @@ static const struct Qdisc_class_ops drr_class_ops = {
 	.delete		= drr_delete_class,
 	.get		= drr_get_class,
 	.put		= drr_put_class,
-	.tcf_chain	= drr_tcf_chain,
+	.tcf_block	= drr_tcf_block,
 	.bind_tcf	= drr_bind_tcf,
 	.unbind_tcf	= drr_unbind_tcf,
 	.graft		= drr_graft_class,
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index 1c0f877f673a..6d94fcc3592a 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -44,6 +44,7 @@ struct mask_value {
 struct dsmark_qdisc_data {
 	struct Qdisc		*q;
 	struct tcf_proto __rcu	*filter_list;
+	struct tcf_block	*block;
 	struct mask_value	*mv;
 	u16			indices;
 	u8			set_tc_index;
@@ -183,11 +184,11 @@ ignore:
 	}
 }
 
-static inline struct tcf_proto __rcu **dsmark_find_tcf(struct Qdisc *sch,
-						       unsigned long cl)
+static struct tcf_block *dsmark_tcf_block(struct Qdisc *sch, unsigned long cl)
 {
 	struct dsmark_qdisc_data *p = qdisc_priv(sch);
-	return &p->filter_list;
+
+	return p->block;
 }
 
 /* --------------------------- Qdisc operations ---------------------------- */
@@ -234,7 +235,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 	else {
 		struct tcf_result res;
 		struct tcf_proto *fl = rcu_dereference_bh(p->filter_list);
-		int result = tc_classify(skb, fl, &res, false);
+		int result = tcf_classify(skb, fl, &res, false);
 
 		pr_debug("result %d class 0x%04x\n", result, res.classid);
 
@@ -242,6 +243,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 #ifdef CONFIG_NET_CLS_ACT
 		case TC_ACT_QUEUED:
 		case TC_ACT_STOLEN:
+		case TC_ACT_TRAP:
 			__qdisc_drop(skb, to_free);
 			return NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
 
@@ -342,6 +344,10 @@ static int dsmark_init(struct Qdisc *sch, struct nlattr *opt)
 	if (!opt)
 		goto errout;
 
+	err = tcf_block_get(&p->block, &p->filter_list);
+	if (err)
+		return err;
+
 	err = nla_parse_nested(tb, TCA_DSMARK_MAX, opt, dsmark_policy, NULL);
 	if (err < 0)
 		goto errout;
@@ -400,7 +406,7 @@ static void dsmark_destroy(struct Qdisc *sch)
 
 	pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p);
 
-	tcf_destroy_chain(&p->filter_list);
+	tcf_block_put(p->block);
 	qdisc_destroy(p->q);
 	if (p->mv != p->embedded)
 		kfree(p->mv);
@@ -468,7 +474,7 @@ static const struct Qdisc_class_ops dsmark_class_ops = {
 	.change		=	dsmark_change,
 	.delete		=	dsmark_delete,
 	.walk		=	dsmark_walk,
-	.tcf_chain	=	dsmark_find_tcf,
+	.tcf_block	=	dsmark_tcf_block,
 	.bind_tcf	=	dsmark_bind_filter,
 	.unbind_tcf	=	dsmark_put,
 	.dump		=	dsmark_dump_class,
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index b488721a0059..147fde73a0f5 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -390,9 +390,17 @@ static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 		q->stat_tcp_retrans++;
 	qdisc_qstats_backlog_inc(sch, skb);
 	if (fq_flow_is_detached(f)) {
+		struct sock *sk = skb->sk;
+
 		fq_flow_add_tail(&q->new_flows, f);
 		if (time_after(jiffies, f->age + q->flow_refill_delay))
 			f->credit = max_t(u32, f->credit, q->quantum);
+		if (sk && q->rate_enable) {
+			if (unlikely(smp_load_acquire(&sk->sk_pacing_status) !=
+				     SK_PACING_FQ))
+				smp_store_release(&sk->sk_pacing_status,
+						  SK_PACING_FQ);
+		}
 		q->inactive_flows--;
 	}
 
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index 9201abce928c..337f2d6d81e4 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -55,6 +55,7 @@ struct fq_codel_flow {
 
 struct fq_codel_sched_data {
 	struct tcf_proto __rcu *filter_list; /* optional external classifier */
+	struct tcf_block *block;
 	struct fq_codel_flow *flows;	/* Flows table [flows_cnt] */
 	u32		*backlogs;	/* backlog table [flows_cnt] */
 	u32		flows_cnt;	/* number of flows */
@@ -96,12 +97,13 @@ static unsigned int fq_codel_classify(struct sk_buff *skb, struct Qdisc *sch,
 		return fq_codel_hash(q, skb) + 1;
 
 	*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
-	result = tc_classify(skb, filter, &res, false);
+	result = tcf_classify(skb, filter, &res, false);
 	if (result >= 0) {
 #ifdef CONFIG_NET_CLS_ACT
 		switch (result) {
 		case TC_ACT_STOLEN:
 		case TC_ACT_QUEUED:
+		case TC_ACT_TRAP:
 			*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
 		case TC_ACT_SHOT:
 			return 0;
@@ -450,7 +452,7 @@ static void fq_codel_destroy(struct Qdisc *sch)
 {
 	struct fq_codel_sched_data *q = qdisc_priv(sch);
 
-	tcf_destroy_chain(&q->filter_list);
+	tcf_block_put(q->block);
 	kvfree(q->backlogs);
 	kvfree(q->flows);
 }
@@ -459,6 +461,7 @@ static int fq_codel_init(struct Qdisc *sch, struct nlattr *opt)
 {
 	struct fq_codel_sched_data *q = qdisc_priv(sch);
 	int i;
+	int err;
 
 	sch->limit = 10*1024;
 	q->flows_cnt = 1024;
@@ -478,6 +481,10 @@ static int fq_codel_init(struct Qdisc *sch, struct nlattr *opt)
 			return err;
 	}
 
+	err = tcf_block_get(&q->block, &q->filter_list);
+	if (err)
+		return err;
+
 	if (!q->flows) {
 		q->flows = kvzalloc(q->flows_cnt *
 					   sizeof(struct fq_codel_flow), GFP_KERNEL);
@@ -589,14 +596,13 @@ static void fq_codel_put(struct Qdisc *q, unsigned long cl)
 {
 }
 
-static struct tcf_proto __rcu **fq_codel_find_tcf(struct Qdisc *sch,
-						  unsigned long cl)
+static struct tcf_block *fq_codel_tcf_block(struct Qdisc *sch, unsigned long cl)
 {
 	struct fq_codel_sched_data *q = qdisc_priv(sch);
 
 	if (cl)
 		return NULL;
-	return &q->filter_list;
+	return q->block;
 }
 
 static int fq_codel_dump_class(struct Qdisc *sch, unsigned long cl,
@@ -679,7 +685,7 @@ static const struct Qdisc_class_ops fq_codel_class_ops = {
 	.leaf		=	fq_codel_leaf,
 	.get		=	fq_codel_get,
 	.put		=	fq_codel_put,
-	.tcf_chain	=	fq_codel_find_tcf,
+	.tcf_block	=	fq_codel_tcf_block,
 	.bind_tcf	=	fq_codel_bind,
 	.unbind_tcf	=	fq_codel_put,
 	.dump		=	fq_codel_dump_class,
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 5cb82f6c1b06..b52f74610dc7 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -116,6 +116,7 @@ struct hfsc_class {
 	struct gnet_stats_queue qstats;
 	struct net_rate_estimator __rcu *rate_est;
 	struct tcf_proto __rcu *filter_list; /* filter list */
+	struct tcf_block *block;
 	unsigned int	filter_cnt;	/* filter count */
 	unsigned int	level;		/* class level in hierarchy */
 
@@ -1040,12 +1041,19 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
 	if (cl == NULL)
 		return -ENOBUFS;
 
+	err = tcf_block_get(&cl->block, &cl->filter_list);
+	if (err) {
+		kfree(cl);
+		return err;
+	}
+
 	if (tca[TCA_RATE]) {
 		err = gen_new_estimator(&cl->bstats, NULL, &cl->rate_est,
 					NULL,
 					qdisc_root_sleeping_running(sch),
 					tca[TCA_RATE]);
 		if (err) {
+			tcf_block_put(cl->block);
 			kfree(cl);
 			return err;
 		}
@@ -1091,7 +1099,7 @@ hfsc_destroy_class(struct Qdisc *sch, struct hfsc_class *cl)
 {
 	struct hfsc_sched *q = qdisc_priv(sch);
 
-	tcf_destroy_chain(&cl->filter_list);
+	tcf_block_put(cl->block);
 	qdisc_destroy(cl->qdisc);
 	gen_kill_estimator(&cl->rate_est);
 	if (cl != &q->root)
@@ -1142,11 +1150,12 @@ hfsc_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
 	*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
 	head = &q->root;
 	tcf = rcu_dereference_bh(q->root.filter_list);
-	while (tcf && (result = tc_classify(skb, tcf, &res, false)) >= 0) {
+	while (tcf && (result = tcf_classify(skb, tcf, &res, false)) >= 0) {
 #ifdef CONFIG_NET_CLS_ACT
 		switch (result) {
 		case TC_ACT_QUEUED:
 		case TC_ACT_STOLEN:
+		case TC_ACT_TRAP:
 			*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
 		case TC_ACT_SHOT:
 			return NULL;
@@ -1261,8 +1270,7 @@ hfsc_unbind_tcf(struct Qdisc *sch, unsigned long arg)
 	cl->filter_cnt--;
 }
 
-static struct tcf_proto __rcu **
-hfsc_tcf_chain(struct Qdisc *sch, unsigned long arg)
+static struct tcf_block *hfsc_tcf_block(struct Qdisc *sch, unsigned long arg)
 {
 	struct hfsc_sched *q = qdisc_priv(sch);
 	struct hfsc_class *cl = (struct hfsc_class *)arg;
@@ -1270,7 +1278,7 @@ hfsc_tcf_chain(struct Qdisc *sch, unsigned long arg)
 	if (cl == NULL)
 		cl = &q->root;
 
-	return &cl->filter_list;
+	return cl->block;
 }
 
 static int
@@ -1515,7 +1523,7 @@ hfsc_destroy_qdisc(struct Qdisc *sch)
 
 	for (i = 0; i < q->clhash.hashsize; i++) {
 		hlist_for_each_entry(cl, &q->clhash.hash[i], cl_common.hnode)
-			tcf_destroy_chain(&cl->filter_list);
+			tcf_block_put(cl->block);
 	}
 	for (i = 0; i < q->clhash.hashsize; i++) {
 		hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i],
@@ -1662,7 +1670,7 @@ static const struct Qdisc_class_ops hfsc_class_ops = {
 	.put		= hfsc_put_class,
 	.bind_tcf	= hfsc_bind_tcf,
 	.unbind_tcf	= hfsc_unbind_tcf,
-	.tcf_chain	= hfsc_tcf_chain,
+	.tcf_block	= hfsc_tcf_block,
 	.dump		= hfsc_dump_class,
 	.dump_stats	= hfsc_dump_class_stats,
 	.walk		= hfsc_walk
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 570ef3b0c09b..203286ab4427 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -105,6 +105,7 @@ struct htb_class {
 	int			quantum;	/* but stored for parent-to-leaf return */
 
 	struct tcf_proto __rcu	*filter_list;	/* class attached filters */
+	struct tcf_block	*block;
 	int			filter_cnt;
 	int			refcnt;		/* usage count of this class */
 
@@ -156,6 +157,7 @@ struct htb_sched {
 
 	/* filters for qdisc itself */
 	struct tcf_proto __rcu	*filter_list;
+	struct tcf_block	*block;
 
 #define HTB_WARN_TOOMANYEVENTS	0x1
 	unsigned int		warned;	/* only one warning */
@@ -231,11 +233,12 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
 	}
 
 	*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
-	while (tcf && (result = tc_classify(skb, tcf, &res, false)) >= 0) {
+	while (tcf && (result = tcf_classify(skb, tcf, &res, false)) >= 0) {
 #ifdef CONFIG_NET_CLS_ACT
 		switch (result) {
 		case TC_ACT_QUEUED:
 		case TC_ACT_STOLEN:
+		case TC_ACT_TRAP:
 			*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
 		case TC_ACT_SHOT:
 			return NULL;
@@ -1017,6 +1020,10 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt)
 	if (!opt)
 		return -EINVAL;
 
+	err = tcf_block_get(&q->block, &q->filter_list);
+	if (err)
+		return err;
+
 	err = nla_parse_nested(tb, TCA_HTB_MAX, opt, htb_policy, NULL);
 	if (err < 0)
 		return err;
@@ -1230,7 +1237,7 @@ static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
 		qdisc_destroy(cl->un.leaf.q);
 	}
 	gen_kill_estimator(&cl->rate_est);
-	tcf_destroy_chain(&cl->filter_list);
+	tcf_block_put(cl->block);
 	kfree(cl);
 }
 
@@ -1248,11 +1255,11 @@ static void htb_destroy(struct Qdisc *sch)
 	 * because filter need its target class alive to be able to call
 	 * unbind_filter on it (without Oops).
 	 */
-	tcf_destroy_chain(&q->filter_list);
+	tcf_block_put(q->block);
 
 	for (i = 0; i < q->clhash.hashsize; i++) {
 		hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode)
-			tcf_destroy_chain(&cl->filter_list);
+			tcf_block_put(cl->block);
 	}
 	for (i = 0; i < q->clhash.hashsize; i++) {
 		hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i],
@@ -1396,6 +1403,11 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
 		if (!cl)
 			goto failure;
 
+		err = tcf_block_get(&cl->block, &cl->filter_list);
+		if (err) {
+			kfree(cl);
+			goto failure;
+		}
 		if (htb_rate_est || tca[TCA_RATE]) {
 			err = gen_new_estimator(&cl->bstats, NULL,
 						&cl->rate_est,
@@ -1403,6 +1415,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
 						qdisc_root_sleeping_running(sch),
 						tca[TCA_RATE] ? : &est.nla);
 			if (err) {
+				tcf_block_put(cl->block);
 				kfree(cl);
 				goto failure;
 			}
@@ -1521,14 +1534,12 @@ failure:
 	return err;
 }
 
-static struct tcf_proto __rcu **htb_find_tcf(struct Qdisc *sch,
-					     unsigned long arg)
+static struct tcf_block *htb_tcf_block(struct Qdisc *sch, unsigned long arg)
 {
 	struct htb_sched *q = qdisc_priv(sch);
 	struct htb_class *cl = (struct htb_class *)arg;
-	struct tcf_proto __rcu **fl = cl ? &cl->filter_list : &q->filter_list;
 
-	return fl;
+	return cl ? cl->block : q->block;
 }
 
 static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent,
@@ -1591,7 +1602,7 @@ static const struct Qdisc_class_ops htb_class_ops = {
 	.change		=	htb_change_class,
 	.delete		=	htb_delete,
 	.walk		=	htb_walk,
-	.tcf_chain	=	htb_find_tcf,
+	.tcf_block	=	htb_tcf_block,
 	.bind_tcf	=	htb_bind_filter,
 	.unbind_tcf	=	htb_unbind_filter,
 	.dump		=	htb_dump_class,
diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c
index 3bab5f66c392..d8a9bebcab90 100644
--- a/net/sched/sch_ingress.c
+++ b/net/sched/sch_ingress.c
@@ -18,6 +18,10 @@
 #include <net/pkt_sched.h>
 #include <net/pkt_cls.h>
 
+struct ingress_sched_data {
+	struct tcf_block *block;
+};
+
 static struct Qdisc *ingress_leaf(struct Qdisc *sch, unsigned long arg)
 {
 	return NULL;
@@ -47,16 +51,23 @@ static void ingress_walk(struct Qdisc *sch, struct qdisc_walker *walker)
 {
 }
 
-static struct tcf_proto __rcu **ingress_find_tcf(struct Qdisc *sch,
-						 unsigned long cl)
+static struct tcf_block *ingress_tcf_block(struct Qdisc *sch, unsigned long cl)
 {
-	struct net_device *dev = qdisc_dev(sch);
+	struct ingress_sched_data *q = qdisc_priv(sch);
 
-	return &dev->ingress_cl_list;
+	return q->block;
 }
 
 static int ingress_init(struct Qdisc *sch, struct nlattr *opt)
 {
+	struct ingress_sched_data *q = qdisc_priv(sch);
+	struct net_device *dev = qdisc_dev(sch);
+	int err;
+
+	err = tcf_block_get(&q->block, &dev->ingress_cl_list);
+	if (err)
+		return err;
+
 	net_inc_ingress_queue();
 	sch->flags |= TCQ_F_CPUSTATS;
 
@@ -65,9 +76,9 @@ static int ingress_init(struct Qdisc *sch, struct nlattr *opt)
 
 static void ingress_destroy(struct Qdisc *sch)
 {
-	struct net_device *dev = qdisc_dev(sch);
+	struct ingress_sched_data *q = qdisc_priv(sch);
 
-	tcf_destroy_chain(&dev->ingress_cl_list);
+	tcf_block_put(q->block);
 	net_dec_ingress_queue();
 }
 
@@ -91,7 +102,7 @@ static const struct Qdisc_class_ops ingress_class_ops = {
 	.get		=	ingress_get,
 	.put		=	ingress_put,
 	.walk		=	ingress_walk,
-	.tcf_chain	=	ingress_find_tcf,
+	.tcf_block	=	ingress_tcf_block,
 	.tcf_cl_offload	=	ingress_cl_offload,
 	.bind_tcf	=	ingress_bind_filter,
 	.unbind_tcf	=	ingress_put,
@@ -100,12 +111,18 @@ static const struct Qdisc_class_ops ingress_class_ops = {
 static struct Qdisc_ops ingress_qdisc_ops __read_mostly = {
 	.cl_ops		=	&ingress_class_ops,
 	.id		=	"ingress",
+	.priv_size	=	sizeof(struct ingress_sched_data),
 	.init		=	ingress_init,
 	.destroy	=	ingress_destroy,
 	.dump		=	ingress_dump,
 	.owner		=	THIS_MODULE,
 };
 
+struct clsact_sched_data {
+	struct tcf_block *ingress_block;
+	struct tcf_block *egress_block;
+};
+
 static unsigned long clsact_get(struct Qdisc *sch, u32 classid)
 {
 	switch (TC_H_MIN(classid)) {
@@ -128,16 +145,15 @@ static unsigned long clsact_bind_filter(struct Qdisc *sch,
 	return clsact_get(sch, classid);
 }
 
-static struct tcf_proto __rcu **clsact_find_tcf(struct Qdisc *sch,
-						unsigned long cl)
+static struct tcf_block *clsact_tcf_block(struct Qdisc *sch, unsigned long cl)
 {
-	struct net_device *dev = qdisc_dev(sch);
+	struct clsact_sched_data *q = qdisc_priv(sch);
 
 	switch (cl) {
 	case TC_H_MIN(TC_H_MIN_INGRESS):
-		return &dev->ingress_cl_list;
+		return q->ingress_block;
 	case TC_H_MIN(TC_H_MIN_EGRESS):
-		return &dev->egress_cl_list;
+		return q->egress_block;
 	default:
 		return NULL;
 	}
@@ -145,6 +161,18 @@ static struct tcf_proto __rcu **clsact_find_tcf(struct Qdisc *sch,
 
 static int clsact_init(struct Qdisc *sch, struct nlattr *opt)
 {
+	struct clsact_sched_data *q = qdisc_priv(sch);
+	struct net_device *dev = qdisc_dev(sch);
+	int err;
+
+	err = tcf_block_get(&q->ingress_block, &dev->ingress_cl_list);
+	if (err)
+		return err;
+
+	err = tcf_block_get(&q->egress_block, &dev->egress_cl_list);
+	if (err)
+		return err;
+
 	net_inc_ingress_queue();
 	net_inc_egress_queue();
 
@@ -155,10 +183,10 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt)
 
 static void clsact_destroy(struct Qdisc *sch)
 {
-	struct net_device *dev = qdisc_dev(sch);
+	struct clsact_sched_data *q = qdisc_priv(sch);
 
-	tcf_destroy_chain(&dev->ingress_cl_list);
-	tcf_destroy_chain(&dev->egress_cl_list);
+	tcf_block_put(q->egress_block);
+	tcf_block_put(q->ingress_block);
 
 	net_dec_ingress_queue();
 	net_dec_egress_queue();
@@ -169,7 +197,7 @@ static const struct Qdisc_class_ops clsact_class_ops = {
 	.get		=	clsact_get,
 	.put		=	ingress_put,
 	.walk		=	ingress_walk,
-	.tcf_chain	=	clsact_find_tcf,
+	.tcf_block	=	clsact_tcf_block,
 	.tcf_cl_offload	=	clsact_cl_offload,
 	.bind_tcf	=	clsact_bind_filter,
 	.unbind_tcf	=	ingress_put,
@@ -178,6 +206,7 @@ static const struct Qdisc_class_ops clsact_class_ops = {
 static struct Qdisc_ops clsact_qdisc_ops __read_mostly = {
 	.cl_ops		=	&clsact_class_ops,
 	.id		=	"clsact",
+	.priv_size	=	sizeof(struct clsact_sched_data),
 	.init		=	clsact_init,
 	.destroy	=	clsact_destroy,
 	.dump		=	ingress_dump,
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
index 43a3a10b3c81..f143b7bbaa0d 100644
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -32,6 +32,7 @@ struct multiq_sched_data {
 	u16 max_bands;
 	u16 curband;
 	struct tcf_proto __rcu *filter_list;
+	struct tcf_block *block;
 	struct Qdisc **queues;
 };
 
@@ -46,11 +47,12 @@ multiq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
 	int err;
 
 	*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
-	err = tc_classify(skb, fl, &res, false);
+	err = tcf_classify(skb, fl, &res, false);
 #ifdef CONFIG_NET_CLS_ACT
 	switch (err) {
 	case TC_ACT_STOLEN:
 	case TC_ACT_QUEUED:
+	case TC_ACT_TRAP:
 		*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
 	case TC_ACT_SHOT:
 		return NULL;
@@ -170,7 +172,7 @@ multiq_destroy(struct Qdisc *sch)
 	int band;
 	struct multiq_sched_data *q = qdisc_priv(sch);
 
-	tcf_destroy_chain(&q->filter_list);
+	tcf_block_put(q->block);
 	for (band = 0; band < q->bands; band++)
 		qdisc_destroy(q->queues[band]);
 
@@ -243,6 +245,10 @@ static int multiq_init(struct Qdisc *sch, struct nlattr *opt)
 	if (opt == NULL)
 		return -EINVAL;
 
+	err = tcf_block_get(&q->block, &q->filter_list);
+	if (err)
+		return err;
+
 	q->max_bands = qdisc_dev(sch)->num_tx_queues;
 
 	q->queues = kcalloc(q->max_bands, sizeof(struct Qdisc *), GFP_KERNEL);
@@ -367,14 +373,13 @@ static void multiq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
 	}
 }
 
-static struct tcf_proto __rcu **multiq_find_tcf(struct Qdisc *sch,
-						unsigned long cl)
+static struct tcf_block *multiq_tcf_block(struct Qdisc *sch, unsigned long cl)
 {
 	struct multiq_sched_data *q = qdisc_priv(sch);
 
 	if (cl)
 		return NULL;
-	return &q->filter_list;
+	return q->block;
 }
 
 static const struct Qdisc_class_ops multiq_class_ops = {
@@ -383,7 +388,7 @@ static const struct Qdisc_class_ops multiq_class_ops = {
 	.get		=	multiq_get,
 	.put		=	multiq_put,
 	.walk		=	multiq_walk,
-	.tcf_chain	=	multiq_find_tcf,
+	.tcf_block	=	multiq_tcf_block,
 	.bind_tcf	=	multiq_bind,
 	.unbind_tcf	=	multiq_put,
 	.dump		=	multiq_dump_class,
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index 92c2e6d448d7..e3e364cc9a70 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -25,6 +25,7 @@
 struct prio_sched_data {
 	int bands;
 	struct tcf_proto __rcu *filter_list;
+	struct tcf_block *block;
 	u8  prio2band[TC_PRIO_MAX+1];
 	struct Qdisc *queues[TCQ_PRIO_BANDS];
 };
@@ -42,11 +43,12 @@ prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
 	*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
 	if (TC_H_MAJ(skb->priority) != sch->handle) {
 		fl = rcu_dereference_bh(q->filter_list);
-		err = tc_classify(skb, fl, &res, false);
+		err = tcf_classify(skb, fl, &res, false);
 #ifdef CONFIG_NET_CLS_ACT
 		switch (err) {
 		case TC_ACT_STOLEN:
 		case TC_ACT_QUEUED:
+		case TC_ACT_TRAP:
 			*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
 		case TC_ACT_SHOT:
 			return NULL;
@@ -145,7 +147,7 @@ prio_destroy(struct Qdisc *sch)
 	int prio;
 	struct prio_sched_data *q = qdisc_priv(sch);
 
-	tcf_destroy_chain(&q->filter_list);
+	tcf_block_put(q->block);
 	for (prio = 0; prio < q->bands; prio++)
 		qdisc_destroy(q->queues[prio]);
 }
@@ -204,9 +206,16 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt)
 
 static int prio_init(struct Qdisc *sch, struct nlattr *opt)
 {
+	struct prio_sched_data *q = qdisc_priv(sch);
+	int err;
+
 	if (!opt)
 		return -EINVAL;
 
+	err = tcf_block_get(&q->block, &q->filter_list);
+	if (err)
+		return err;
+
 	return prio_tune(sch, opt);
 }
 
@@ -317,14 +326,13 @@ static void prio_walk(struct Qdisc *sch, struct qdisc_walker *arg)
 	}
 }
 
-static struct tcf_proto __rcu **prio_find_tcf(struct Qdisc *sch,
-					      unsigned long cl)
+static struct tcf_block *prio_tcf_block(struct Qdisc *sch, unsigned long cl)
 {
 	struct prio_sched_data *q = qdisc_priv(sch);
 
 	if (cl)
 		return NULL;
-	return &q->filter_list;
+	return q->block;
 }
 
 static const struct Qdisc_class_ops prio_class_ops = {
@@ -333,7 +341,7 @@ static const struct Qdisc_class_ops prio_class_ops = {
 	.get		=	prio_get,
 	.put		=	prio_put,
 	.walk		=	prio_walk,
-	.tcf_chain	=	prio_find_tcf,
+	.tcf_block	=	prio_tcf_block,
 	.bind_tcf	=	prio_bind,
 	.unbind_tcf	=	prio_put,
 	.dump		=	prio_dump_class,
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index 041eba3006cc..0e16dfda0bd7 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -182,6 +182,7 @@ struct qfq_group {
 
 struct qfq_sched {
 	struct tcf_proto __rcu *filter_list;
+	struct tcf_block	*block;
 	struct Qdisc_class_hash clhash;
 
 	u64			oldV, V;	/* Precise virtual times. */
@@ -582,15 +583,14 @@ static void qfq_put_class(struct Qdisc *sch, unsigned long arg)
 		qfq_destroy_class(sch, cl);
 }
 
-static struct tcf_proto __rcu **qfq_tcf_chain(struct Qdisc *sch,
-					      unsigned long cl)
+static struct tcf_block *qfq_tcf_block(struct Qdisc *sch, unsigned long cl)
 {
 	struct qfq_sched *q = qdisc_priv(sch);
 
 	if (cl)
 		return NULL;
 
-	return &q->filter_list;
+	return q->block;
 }
 
 static unsigned long qfq_bind_tcf(struct Qdisc *sch, unsigned long parent,
@@ -720,12 +720,13 @@ static struct qfq_class *qfq_classify(struct sk_buff *skb, struct Qdisc *sch,
 
 	*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
 	fl = rcu_dereference_bh(q->filter_list);
-	result = tc_classify(skb, fl, &res, false);
+	result = tcf_classify(skb, fl, &res, false);
 	if (result >= 0) {
 #ifdef CONFIG_NET_CLS_ACT
 		switch (result) {
 		case TC_ACT_QUEUED:
 		case TC_ACT_STOLEN:
+		case TC_ACT_TRAP:
 			*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
 		case TC_ACT_SHOT:
 			return NULL;
@@ -1438,6 +1439,10 @@ static int qfq_init_qdisc(struct Qdisc *sch, struct nlattr *opt)
 	int i, j, err;
 	u32 max_cl_shift, maxbudg_shift, max_classes;
 
+	err = tcf_block_get(&q->block, &q->filter_list);
+	if (err)
+		return err;
+
 	err = qdisc_class_hash_init(&q->clhash);
 	if (err < 0)
 		return err;
@@ -1492,7 +1497,7 @@ static void qfq_destroy_qdisc(struct Qdisc *sch)
 	struct hlist_node *next;
 	unsigned int i;
 
-	tcf_destroy_chain(&q->filter_list);
+	tcf_block_put(q->block);
 
 	for (i = 0; i < q->clhash.hashsize; i++) {
 		hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i],
@@ -1508,7 +1513,7 @@ static const struct Qdisc_class_ops qfq_class_ops = {
 	.delete		= qfq_delete_class,
 	.get		= qfq_get_class,
 	.put		= qfq_put_class,
-	.tcf_chain	= qfq_tcf_chain,
+	.tcf_block	= qfq_tcf_block,
 	.bind_tcf	= qfq_bind_tcf,
 	.unbind_tcf	= qfq_unbind_tcf,
 	.graft		= qfq_graft_class,
diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c
index 0f777273ba29..11fb6ec878d6 100644
--- a/net/sched/sch_sfb.c
+++ b/net/sched/sch_sfb.c
@@ -56,6 +56,7 @@ struct sfb_bins {
 struct sfb_sched_data {
 	struct Qdisc	*qdisc;
 	struct tcf_proto __rcu *filter_list;
+	struct tcf_block *block;
 	unsigned long	rehash_interval;
 	unsigned long	warmup_time;	/* double buffering warmup time in jiffies */
 	u32		max;
@@ -259,12 +260,13 @@ static bool sfb_classify(struct sk_buff *skb, struct tcf_proto *fl,
 	struct tcf_result res;
 	int result;
 
-	result = tc_classify(skb, fl, &res, false);
+	result = tcf_classify(skb, fl, &res, false);
 	if (result >= 0) {
 #ifdef CONFIG_NET_CLS_ACT
 		switch (result) {
 		case TC_ACT_STOLEN:
 		case TC_ACT_QUEUED:
+		case TC_ACT_TRAP:
 			*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
 		case TC_ACT_SHOT:
 			return false;
@@ -465,7 +467,7 @@ static void sfb_destroy(struct Qdisc *sch)
 {
 	struct sfb_sched_data *q = qdisc_priv(sch);
 
-	tcf_destroy_chain(&q->filter_list);
+	tcf_block_put(q->block);
 	qdisc_destroy(q->qdisc);
 }
 
@@ -549,6 +551,11 @@ static int sfb_change(struct Qdisc *sch, struct nlattr *opt)
 static int sfb_init(struct Qdisc *sch, struct nlattr *opt)
 {
 	struct sfb_sched_data *q = qdisc_priv(sch);
+	int err;
+
+	err = tcf_block_get(&q->block, &q->filter_list);
+	if (err)
+		return err;
 
 	q->qdisc = &noop_qdisc;
 	return sfb_change(sch, opt);
@@ -657,14 +664,13 @@ static void sfb_walk(struct Qdisc *sch, struct qdisc_walker *walker)
 	}
 }
 
-static struct tcf_proto __rcu **sfb_find_tcf(struct Qdisc *sch,
-					     unsigned long cl)
+static struct tcf_block *sfb_tcf_block(struct Qdisc *sch, unsigned long cl)
 {
 	struct sfb_sched_data *q = qdisc_priv(sch);
 
 	if (cl)
 		return NULL;
-	return &q->filter_list;
+	return q->block;
 }
 
 static unsigned long sfb_bind(struct Qdisc *sch, unsigned long parent,
@@ -682,7 +688,7 @@ static const struct Qdisc_class_ops sfb_class_ops = {
 	.change		=	sfb_change_class,
 	.delete		=	sfb_delete,
 	.walk		=	sfb_walk,
-	.tcf_chain	=	sfb_find_tcf,
+	.tcf_block	=	sfb_tcf_block,
 	.bind_tcf	=	sfb_bind,
 	.unbind_tcf	=	sfb_put,
 	.dump		=	sfb_dump_class,
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 332d94be6e1c..f80ea2cc5f1f 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -126,6 +126,7 @@ struct sfq_sched_data {
 	u8		flags;
 	unsigned short  scaled_quantum; /* SFQ_ALLOT_SIZE(quantum) */
 	struct tcf_proto __rcu *filter_list;
+	struct tcf_block *block;
 	sfq_index	*ht;		/* Hash table ('divisor' slots) */
 	struct sfq_slot	*slots;		/* Flows table ('maxflows' entries) */
 
@@ -180,12 +181,13 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
 		return sfq_hash(q, skb) + 1;
 
 	*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
-	result = tc_classify(skb, fl, &res, false);
+	result = tcf_classify(skb, fl, &res, false);
 	if (result >= 0) {
 #ifdef CONFIG_NET_CLS_ACT
 		switch (result) {
 		case TC_ACT_STOLEN:
 		case TC_ACT_QUEUED:
+		case TC_ACT_TRAP:
 			*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
 		case TC_ACT_SHOT:
 			return 0;
@@ -697,7 +699,7 @@ static void sfq_destroy(struct Qdisc *sch)
 {
 	struct sfq_sched_data *q = qdisc_priv(sch);
 
-	tcf_destroy_chain(&q->filter_list);
+	tcf_block_put(q->block);
 	q->perturb_period = 0;
 	del_timer_sync(&q->perturb_timer);
 	sfq_free(q->ht);
@@ -709,6 +711,11 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt)
 {
 	struct sfq_sched_data *q = qdisc_priv(sch);
 	int i;
+	int err;
+
+	err = tcf_block_get(&q->block, &q->filter_list);
+	if (err)
+		return err;
 
 	setup_deferrable_timer(&q->perturb_timer, sfq_perturbation,
 			       (unsigned long)sch);
@@ -815,14 +822,13 @@ static void sfq_put(struct Qdisc *q, unsigned long cl)
 {
 }
 
-static struct tcf_proto __rcu **sfq_find_tcf(struct Qdisc *sch,
-					     unsigned long cl)
+static struct tcf_block *sfq_tcf_block(struct Qdisc *sch, unsigned long cl)
 {
 	struct sfq_sched_data *q = qdisc_priv(sch);
 
 	if (cl)
 		return NULL;
-	return &q->filter_list;
+	return q->block;
 }
 
 static int sfq_dump_class(struct Qdisc *sch, unsigned long cl,
@@ -878,7 +884,7 @@ static const struct Qdisc_class_ops sfq_class_ops = {
 	.leaf		=	sfq_leaf,
 	.get		=	sfq_get,
 	.put		=	sfq_put,
-	.tcf_chain	=	sfq_find_tcf,
+	.tcf_block	=	sfq_tcf_block,
 	.bind_tcf	=	sfq_bind,
 	.unbind_tcf	=	sfq_put,
 	.dump		=	sfq_dump_class,
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 95238284c422..288c5e0cda5d 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -246,7 +246,8 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
 	if (!sctp_ulpq_init(&asoc->ulpq, asoc))
 		goto fail_init;
 
-	if (sctp_stream_new(asoc, gfp))
+	if (sctp_stream_init(&asoc->stream, asoc->c.sinit_num_ostreams,
+			     0, gfp))
 		goto fail_init;
 
 	/* Assume that peer would support both address types unless we are
@@ -291,7 +292,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
 	return asoc;
 
 stream_free:
-	sctp_stream_free(asoc->stream);
+	sctp_stream_free(&asoc->stream);
 fail_init:
 	sock_put(asoc->base.sk);
 	sctp_endpoint_put(asoc->ep);
@@ -365,7 +366,7 @@ void sctp_association_free(struct sctp_association *asoc)
 	sctp_tsnmap_free(&asoc->peer.tsn_map);
 
 	/* Free stream information. */
-	sctp_stream_free(asoc->stream);
+	sctp_stream_free(&asoc->stream);
 
 	if (asoc->strreset_chunk)
 		sctp_chunk_free(asoc->strreset_chunk);
@@ -1151,7 +1152,7 @@ void sctp_assoc_update(struct sctp_association *asoc,
 		/* Reinitialize SSN for both local streams
 		 * and peer's streams.
 		 */
-		sctp_stream_clear(asoc->stream);
+		sctp_stream_clear(&asoc->stream);
 
 		/* Flush the ULP reassembly and ordered queue.
 		 * Any data there will now be stale and will
@@ -1177,11 +1178,8 @@ void sctp_assoc_update(struct sctp_association *asoc,
 		asoc->ctsn_ack_point = asoc->next_tsn - 1;
 		asoc->adv_peer_ack_point = asoc->ctsn_ack_point;
 
-		if (sctp_state(asoc, COOKIE_WAIT)) {
-			sctp_stream_free(asoc->stream);
-			asoc->stream = new->stream;
-			new->stream = NULL;
-		}
+		if (sctp_state(asoc, COOKIE_WAIT))
+			sctp_stream_update(&asoc->stream, &new->stream);
 
 		if (!asoc->assoc_id) {
 			/* get a new association id since we don't have one
diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c
index 697721a7a3f1..81466f6442e8 100644
--- a/net/sctp/chunk.c
+++ b/net/sctp/chunk.c
@@ -307,7 +307,7 @@ int sctp_chunk_abandoned(struct sctp_chunk *chunk)
 	if (SCTP_PR_TTL_ENABLED(chunk->sinfo.sinfo_flags) &&
 	    time_after(jiffies, chunk->msg->expires_at)) {
 		struct sctp_stream_out *streamout =
-			&chunk->asoc->stream->out[chunk->sinfo.sinfo_stream];
+			&chunk->asoc->stream.out[chunk->sinfo.sinfo_stream];
 
 		if (chunk->sent_count) {
 			chunk->asoc->abandoned_sent[SCTP_PR_INDEX(TTL)]++;
@@ -320,7 +320,7 @@ int sctp_chunk_abandoned(struct sctp_chunk *chunk)
 	} else if (SCTP_PR_RTX_ENABLED(chunk->sinfo.sinfo_flags) &&
 		   chunk->sent_count > chunk->sinfo.sinfo_timetolive) {
 		struct sctp_stream_out *streamout =
-			&chunk->asoc->stream->out[chunk->sinfo.sinfo_stream];
+			&chunk->asoc->stream.out[chunk->sinfo.sinfo_stream];
 
 		chunk->asoc->abandoned_sent[SCTP_PR_INDEX(RTX)]++;
 		streamout->abandoned_sent[SCTP_PR_INDEX(RTX)]++;
diff --git a/net/sctp/offload.c b/net/sctp/offload.c
index 4f5a2b580aa5..275925b93b29 100644
--- a/net/sctp/offload.c
+++ b/net/sctp/offload.c
@@ -35,6 +35,7 @@
 static __le32 sctp_gso_make_checksum(struct sk_buff *skb)
 {
 	skb->ip_summed = CHECKSUM_NONE;
+	skb->csum_not_inet = 0;
 	return sctp_compute_cksum(skb, skb_transport_offset(skb));
 }
 
@@ -98,6 +99,11 @@ static const struct net_offload sctp6_offload = {
 	},
 };
 
+static const struct skb_checksum_ops crc32c_csum_ops = {
+	.update  = sctp_csum_update,
+	.combine = sctp_csum_combine,
+};
+
 int __init sctp_offload_init(void)
 {
 	int ret;
@@ -110,6 +116,7 @@ int __init sctp_offload_init(void)
 	if (ret)
 		goto ipv4;
 
+	crc32c_csum_stub = &crc32c_csum_ops;
 	return ret;
 
 ipv4:
diff --git a/net/sctp/output.c b/net/sctp/output.c
index 1409a875ad8e..e2edf2ebbade 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -538,6 +538,7 @@ merge:
 	} else {
 chksum:
 		head->ip_summed = CHECKSUM_PARTIAL;
+		head->csum_not_inet = 1;
 		head->csum_start = skb_transport_header(head) - head->head;
 		head->csum_offset = offsetof(struct sctphdr, checksum);
 	}
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index fe4c3d462f6e..20299df163b9 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -363,7 +363,7 @@ static int sctp_prsctp_prune_sent(struct sctp_association *asoc,
 		sctp_insert_list(&asoc->outqueue.abandoned,
 				 &chk->transmitted_list);
 
-		streamout = &asoc->stream->out[chk->sinfo.sinfo_stream];
+		streamout = &asoc->stream.out[chk->sinfo.sinfo_stream];
 		asoc->sent_cnt_removable--;
 		asoc->abandoned_sent[SCTP_PR_INDEX(PRIO)]++;
 		streamout->abandoned_sent[SCTP_PR_INDEX(PRIO)]++;
@@ -400,9 +400,9 @@ static int sctp_prsctp_prune_unsent(struct sctp_association *asoc,
 		q->out_qlen -= chk->skb->len;
 		asoc->sent_cnt_removable--;
 		asoc->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++;
-		if (chk->sinfo.sinfo_stream < asoc->stream->outcnt) {
+		if (chk->sinfo.sinfo_stream < asoc->stream.outcnt) {
 			struct sctp_stream_out *streamout =
-				&asoc->stream->out[chk->sinfo.sinfo_stream];
+				&asoc->stream.out[chk->sinfo.sinfo_stream];
 
 			streamout->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++;
 		}
@@ -1036,7 +1036,7 @@ static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp)
 			/* RFC 2960 6.5 Every DATA chunk MUST carry a valid
 			 * stream identifier.
 			 */
-			if (chunk->sinfo.sinfo_stream >= asoc->stream->outcnt) {
+			if (chunk->sinfo.sinfo_stream >= asoc->stream.outcnt) {
 
 				/* Mark as failed send. */
 				sctp_chunk_fail(chunk, SCTP_ERROR_INV_STRM);
@@ -1054,7 +1054,7 @@ static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp)
 				continue;
 			}
 
-			if (asoc->stream->out[sid].state == SCTP_STREAM_CLOSED) {
+			if (asoc->stream.out[sid].state == SCTP_STREAM_CLOSED) {
 				sctp_outq_head_data(q, chunk);
 				goto sctp_flush_out;
 			}
diff --git a/net/sctp/proc.c b/net/sctp/proc.c
index a0b29d43627f..5a27d0f03df5 100644
--- a/net/sctp/proc.c
+++ b/net/sctp/proc.c
@@ -361,8 +361,8 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v)
 	sctp_seq_dump_remote_addrs(seq, assoc);
 	seq_printf(seq, "\t%8lu %5d %5d %4d %4d %4d %8d "
 		   "%8d %8d %8d %8d",
-		assoc->hbinterval, assoc->stream->incnt,
-		assoc->stream->outcnt, assoc->max_retrans,
+		assoc->hbinterval, assoc->stream.incnt,
+		assoc->stream.outcnt, assoc->max_retrans,
 		assoc->init_retries, assoc->shutdown_retries,
 		assoc->rtx_data_chunks,
 		atomic_read(&sk->sk_wmem_alloc),
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 92e332e17391..bd439edf2d8a 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -1544,7 +1544,7 @@ void sctp_chunk_assign_ssn(struct sctp_chunk *chunk)
 
 	/* All fragments will be on the same stream */
 	sid = ntohs(chunk->subh.data_hdr->stream);
-	stream = chunk->asoc->stream;
+	stream = &chunk->asoc->stream;
 
 	/* Now assign the sequence number to the entire message.
 	 * All fragments must have the same stream sequence number.
@@ -2454,7 +2454,8 @@ int sctp_process_init(struct sctp_association *asoc, struct sctp_chunk *chunk,
 	 * stream sequence number shall be set to 0.
 	 */
 
-	if (sctp_stream_init(asoc, gfp))
+	if (sctp_stream_init(&asoc->stream, asoc->c.sinit_num_ostreams,
+			     asoc->c.sinit_max_instreams, gfp))
 		goto clean_up;
 
 	if (!asoc->temp && sctp_assoc_set_id(asoc, gfp))
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index f863b5573e42..df73190da761 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -3958,7 +3958,7 @@ sctp_disposition_t sctp_sf_eat_fwd_tsn(struct net *net,
 
 	/* Silently discard the chunk if stream-id is not valid */
 	sctp_walk_fwdtsn(skip, chunk) {
-		if (ntohs(skip->stream) >= asoc->stream->incnt)
+		if (ntohs(skip->stream) >= asoc->stream.incnt)
 			goto discard_noforce;
 	}
 
@@ -4029,7 +4029,7 @@ sctp_disposition_t sctp_sf_eat_fwd_tsn_fast(
 
 	/* Silently discard the chunk if stream-id is not valid */
 	sctp_walk_fwdtsn(skip, chunk) {
-		if (ntohs(skip->stream) >= asoc->stream->incnt)
+		if (ntohs(skip->stream) >= asoc->stream.incnt)
 			goto gen_shutdown;
 	}
 
@@ -6365,7 +6365,7 @@ static int sctp_eat_data(const struct sctp_association *asoc,
 	 * and discard the DATA chunk.
 	 */
 	sid = ntohs(data_hdr->stream);
-	if (sid >= asoc->stream->incnt) {
+	if (sid >= asoc->stream.incnt) {
 		/* Mark tsn as received even though we drop it */
 		sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_TSN, SCTP_U32(tsn));
 
@@ -6387,7 +6387,7 @@ static int sctp_eat_data(const struct sctp_association *asoc,
 	 * and is invalid.
 	 */
 	ssn = ntohs(data_hdr->ssn);
-	if (ordered && SSN_lt(ssn, sctp_ssn_peek(asoc->stream, in, sid)))
+	if (ordered && SSN_lt(ssn, sctp_ssn_peek(&asoc->stream, in, sid)))
 		return SCTP_IERROR_PROTO_VIOLATION;
 
 	/* Send the data up to the user.  Note:  Schedule  the
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index f16c8d97b7f3..0822046e4f3f 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -1920,7 +1920,7 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
 	}
 
 	/* Check for invalid stream. */
-	if (sinfo->sinfo_stream >= asoc->stream->outcnt) {
+	if (sinfo->sinfo_stream >= asoc->stream.outcnt) {
 		err = -EINVAL;
 		goto out_free;
 	}
@@ -4497,8 +4497,8 @@ int sctp_get_sctp_info(struct sock *sk, struct sctp_association *asoc,
 	info->sctpi_rwnd = asoc->a_rwnd;
 	info->sctpi_unackdata = asoc->unack_data;
 	info->sctpi_penddata = sctp_tsnmap_pending(&asoc->peer.tsn_map);
-	info->sctpi_instrms = asoc->stream->incnt;
-	info->sctpi_outstrms = asoc->stream->outcnt;
+	info->sctpi_instrms = asoc->stream.incnt;
+	info->sctpi_outstrms = asoc->stream.outcnt;
 	list_for_each(pos, &asoc->base.inqueue.in_chunk_list)
 		info->sctpi_inqueue++;
 	list_for_each(pos, &asoc->outqueue.out_chunk_list)
@@ -4727,8 +4727,8 @@ static int sctp_getsockopt_sctp_status(struct sock *sk, int len,
 	status.sstat_unackdata = asoc->unack_data;
 
 	status.sstat_penddata = sctp_tsnmap_pending(&asoc->peer.tsn_map);
-	status.sstat_instrms = asoc->stream->incnt;
-	status.sstat_outstrms = asoc->stream->outcnt;
+	status.sstat_instrms = asoc->stream.incnt;
+	status.sstat_outstrms = asoc->stream.outcnt;
 	status.sstat_fragmentation_point = asoc->frag_point;
 	status.sstat_primary.spinfo_assoc_id = sctp_assoc2id(transport->asoc);
 	memcpy(&status.sstat_primary.spinfo_address, &transport->ipaddr,
@@ -6600,10 +6600,10 @@ static int sctp_getsockopt_pr_streamstatus(struct sock *sk, int len,
 		goto out;
 
 	asoc = sctp_id2assoc(sk, params.sprstat_assoc_id);
-	if (!asoc || params.sprstat_sid >= asoc->stream->outcnt)
+	if (!asoc || params.sprstat_sid >= asoc->stream.outcnt)
 		goto out;
 
-	streamout = &asoc->stream->out[params.sprstat_sid];
+	streamout = &asoc->stream.out[params.sprstat_sid];
 	if (policy == SCTP_PR_SCTP_NONE) {
 		params.sprstat_abandoned_unsent = 0;
 		params.sprstat_abandoned_sent = 0;
diff --git a/net/sctp/stream.c b/net/sctp/stream.c
index dda53a293986..82e6d40052a8 100644
--- a/net/sctp/stream.c
+++ b/net/sctp/stream.c
@@ -35,70 +35,43 @@
 #include <net/sctp/sctp.h>
 #include <net/sctp/sm.h>
 
-int sctp_stream_new(struct sctp_association *asoc, gfp_t gfp)
+int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt,
+		     gfp_t gfp)
 {
-	struct sctp_stream *stream;
-	int i;
-
-	stream = kzalloc(sizeof(*stream), gfp);
-	if (!stream)
-		return -ENOMEM;
-
-	stream->outcnt = asoc->c.sinit_num_ostreams;
-	stream->out = kcalloc(stream->outcnt, sizeof(*stream->out), gfp);
-	if (!stream->out) {
-		kfree(stream);
-		return -ENOMEM;
-	}
-	for (i = 0; i < stream->outcnt; i++)
-		stream->out[i].state = SCTP_STREAM_OPEN;
-
-	asoc->stream = stream;
-
-	return 0;
-}
-
-int sctp_stream_init(struct sctp_association *asoc, gfp_t gfp)
-{
-	struct sctp_stream *stream = asoc->stream;
 	int i;
 
 	/* Initial stream->out size may be very big, so free it and alloc
 	 * a new one with new outcnt to save memory.
 	 */
 	kfree(stream->out);
-	stream->outcnt = asoc->c.sinit_num_ostreams;
-	stream->out = kcalloc(stream->outcnt, sizeof(*stream->out), gfp);
+
+	stream->out = kcalloc(outcnt, sizeof(*stream->out), gfp);
 	if (!stream->out)
-		goto nomem;
+		return -ENOMEM;
 
+	stream->outcnt = outcnt;
 	for (i = 0; i < stream->outcnt; i++)
 		stream->out[i].state = SCTP_STREAM_OPEN;
 
-	stream->incnt = asoc->c.sinit_max_instreams;
-	stream->in = kcalloc(stream->incnt, sizeof(*stream->in), gfp);
+	if (!incnt)
+		return 0;
+
+	stream->in = kcalloc(incnt, sizeof(*stream->in), gfp);
 	if (!stream->in) {
 		kfree(stream->out);
-		goto nomem;
+		stream->out = NULL;
+		return -ENOMEM;
 	}
 
-	return 0;
-
-nomem:
-	asoc->stream = NULL;
-	kfree(stream);
+	stream->incnt = incnt;
 
-	return -ENOMEM;
+	return 0;
 }
 
 void sctp_stream_free(struct sctp_stream *stream)
 {
-	if (unlikely(!stream))
-		return;
-
 	kfree(stream->out);
 	kfree(stream->in);
-	kfree(stream);
 }
 
 void sctp_stream_clear(struct sctp_stream *stream)
@@ -112,6 +85,19 @@ void sctp_stream_clear(struct sctp_stream *stream)
 		stream->in[i].ssn = 0;
 }
 
+void sctp_stream_update(struct sctp_stream *stream, struct sctp_stream *new)
+{
+	sctp_stream_free(stream);
+
+	stream->out = new->out;
+	stream->in  = new->in;
+	stream->outcnt = new->outcnt;
+	stream->incnt  = new->incnt;
+
+	new->out = NULL;
+	new->in  = NULL;
+}
+
 static int sctp_send_reconf(struct sctp_association *asoc,
 			    struct sctp_chunk *chunk)
 {
@@ -128,7 +114,7 @@ static int sctp_send_reconf(struct sctp_association *asoc,
 int sctp_send_reset_streams(struct sctp_association *asoc,
 			    struct sctp_reset_streams *params)
 {
-	struct sctp_stream *stream = asoc->stream;
+	struct sctp_stream *stream = &asoc->stream;
 	__u16 i, str_nums, *str_list;
 	struct sctp_chunk *chunk;
 	int retval = -EINVAL;
@@ -214,6 +200,7 @@ out:
 
 int sctp_send_reset_assoc(struct sctp_association *asoc)
 {
+	struct sctp_stream *stream = &asoc->stream;
 	struct sctp_chunk *chunk = NULL;
 	int retval;
 	__u16 i;
@@ -230,8 +217,8 @@ int sctp_send_reset_assoc(struct sctp_association *asoc)
 		return -ENOMEM;
 
 	/* Block further xmit of data until this request is completed */
-	for (i = 0; i < asoc->stream->outcnt; i++)
-		asoc->stream->out[i].state = SCTP_STREAM_CLOSED;
+	for (i = 0; i < stream->outcnt; i++)
+		stream->out[i].state = SCTP_STREAM_CLOSED;
 
 	asoc->strreset_chunk = chunk;
 	sctp_chunk_hold(asoc->strreset_chunk);
@@ -241,8 +228,8 @@ int sctp_send_reset_assoc(struct sctp_association *asoc)
 		sctp_chunk_put(asoc->strreset_chunk);
 		asoc->strreset_chunk = NULL;
 
-		for (i = 0; i < asoc->stream->outcnt; i++)
-			asoc->stream->out[i].state = SCTP_STREAM_OPEN;
+		for (i = 0; i < stream->outcnt; i++)
+			stream->out[i].state = SCTP_STREAM_OPEN;
 
 		return retval;
 	}
@@ -255,7 +242,7 @@ int sctp_send_reset_assoc(struct sctp_association *asoc)
 int sctp_send_add_streams(struct sctp_association *asoc,
 			  struct sctp_add_streams *params)
 {
-	struct sctp_stream *stream = asoc->stream;
+	struct sctp_stream *stream = &asoc->stream;
 	struct sctp_chunk *chunk = NULL;
 	int retval = -ENOMEM;
 	__u32 outcnt, incnt;
@@ -357,7 +344,7 @@ struct sctp_chunk *sctp_process_strreset_outreq(
 				struct sctp_ulpevent **evp)
 {
 	struct sctp_strreset_outreq *outreq = param.v;
-	struct sctp_stream *stream = asoc->stream;
+	struct sctp_stream *stream = &asoc->stream;
 	__u16 i, nums, flags = 0, *str_p = NULL;
 	__u32 result = SCTP_STRRESET_DENIED;
 	__u32 request_seq;
@@ -449,7 +436,7 @@ struct sctp_chunk *sctp_process_strreset_inreq(
 				struct sctp_ulpevent **evp)
 {
 	struct sctp_strreset_inreq *inreq = param.v;
-	struct sctp_stream *stream = asoc->stream;
+	struct sctp_stream *stream = &asoc->stream;
 	__u32 result = SCTP_STRRESET_DENIED;
 	struct sctp_chunk *chunk = NULL;
 	__u16 i, nums, *str_p;
@@ -523,7 +510,7 @@ struct sctp_chunk *sctp_process_strreset_tsnreq(
 {
 	__u32 init_tsn = 0, next_tsn = 0, max_tsn_seen;
 	struct sctp_strreset_tsnreq *tsnreq = param.v;
-	struct sctp_stream *stream = asoc->stream;
+	struct sctp_stream *stream = &asoc->stream;
 	__u32 result = SCTP_STRRESET_DENIED;
 	__u32 request_seq;
 	__u16 i;
@@ -612,7 +599,7 @@ struct sctp_chunk *sctp_process_strreset_addstrm_out(
 				struct sctp_ulpevent **evp)
 {
 	struct sctp_strreset_addstrm *addstrm = param.v;
-	struct sctp_stream *stream = asoc->stream;
+	struct sctp_stream *stream = &asoc->stream;
 	__u32 result = SCTP_STRRESET_DENIED;
 	struct sctp_stream_in *streamin;
 	__u32 request_seq, incnt;
@@ -687,7 +674,7 @@ struct sctp_chunk *sctp_process_strreset_addstrm_in(
 				struct sctp_ulpevent **evp)
 {
 	struct sctp_strreset_addstrm *addstrm = param.v;
-	struct sctp_stream *stream = asoc->stream;
+	struct sctp_stream *stream = &asoc->stream;
 	__u32 result = SCTP_STRRESET_DENIED;
 	struct sctp_stream_out *streamout;
 	struct sctp_chunk *chunk = NULL;
@@ -758,8 +745,8 @@ struct sctp_chunk *sctp_process_strreset_resp(
 				union sctp_params param,
 				struct sctp_ulpevent **evp)
 {
+	struct sctp_stream *stream = &asoc->stream;
 	struct sctp_strreset_resp *resp = param.v;
-	struct sctp_stream *stream = asoc->stream;
 	struct sctp_transport *t;
 	__u16 i, nums, flags = 0;
 	sctp_paramhdr_t *req;
diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c
index aa3624d50278..25f7e4140566 100644
--- a/net/sctp/ulpqueue.c
+++ b/net/sctp/ulpqueue.c
@@ -764,7 +764,7 @@ static void sctp_ulpq_retrieve_ordered(struct sctp_ulpq *ulpq,
 	__u16 sid, csid, cssn;
 
 	sid = event->stream;
-	stream  = ulpq->asoc->stream;
+	stream  = &ulpq->asoc->stream;
 
 	event_list = (struct sk_buff_head *) sctp_event2skb(event)->prev;
 
@@ -858,7 +858,7 @@ static struct sctp_ulpevent *sctp_ulpq_order(struct sctp_ulpq *ulpq,
 	/* Note: The stream ID must be verified before this routine.  */
 	sid = event->stream;
 	ssn = event->ssn;
-	stream  = ulpq->asoc->stream;
+	stream  = &ulpq->asoc->stream;
 
 	/* Is this the expected SSN for this stream ID?  */
 	if (ssn != sctp_ssn_peek(stream, in, sid)) {
@@ -893,7 +893,7 @@ static void sctp_ulpq_reap_ordered(struct sctp_ulpq *ulpq, __u16 sid)
 	struct sk_buff_head *lobby = &ulpq->lobby;
 	__u16 csid, cssn;
 
-	stream = ulpq->asoc->stream;
+	stream = &ulpq->asoc->stream;
 
 	/* We are holding the chunks by stream, by SSN.  */
 	skb_queue_head_init(&temp);
@@ -958,7 +958,7 @@ void sctp_ulpq_skip(struct sctp_ulpq *ulpq, __u16 sid, __u16 ssn)
 	struct sctp_stream *stream;
 
 	/* Note: The stream ID must be verified before this routine.  */
-	stream  = ulpq->asoc->stream;
+	stream  = &ulpq->asoc->stream;
 
 	/* Is this an old SSN?  If so ignore. */
 	if (SSN_lt(ssn, sctp_ssn_peek(stream, in, sid)))
diff --git a/net/socket.c b/net/socket.c
index c2564eb25c6b..8f9dab330d57 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -461,7 +461,7 @@ EXPORT_SYMBOL(sock_from_file);
  *	@err: pointer to an error code return
  *
  *	The file handle passed in is locked and the socket it is bound
- *	too is returned. If an error occurs the err pointer is overwritten
+ *	to is returned. If an error occurs the err pointer is overwritten
  *	with a negative errno code and NULL is returned. The function checks
  *	for both invalid handles and passing a handle which is not a socket.
  *
@@ -662,6 +662,40 @@ static bool skb_is_err_queue(const struct sk_buff *skb)
 	return skb->pkt_type == PACKET_OUTGOING;
 }
 
+/* On transmit, software and hardware timestamps are returned independently.
+ * As the two skb clones share the hardware timestamp, which may be updated
+ * before the software timestamp is received, a hardware TX timestamp may be
+ * returned only if there is no software TX timestamp. Ignore false software
+ * timestamps, which may be made in the __sock_recv_timestamp() call when the
+ * option SO_TIMESTAMP(NS) is enabled on the socket, even when the skb has a
+ * hardware timestamp.
+ */
+static bool skb_is_swtx_tstamp(const struct sk_buff *skb, int false_tstamp)
+{
+	return skb->tstamp && !false_tstamp && skb_is_err_queue(skb);
+}
+
+static void put_ts_pktinfo(struct msghdr *msg, struct sk_buff *skb)
+{
+	struct scm_ts_pktinfo ts_pktinfo;
+	struct net_device *orig_dev;
+
+	if (!skb_mac_header_was_set(skb))
+		return;
+
+	memset(&ts_pktinfo, 0, sizeof(ts_pktinfo));
+
+	rcu_read_lock();
+	orig_dev = dev_get_by_napi_id(skb_napi_id(skb));
+	if (orig_dev)
+		ts_pktinfo.if_index = orig_dev->ifindex;
+	rcu_read_unlock();
+
+	ts_pktinfo.pkt_length = skb->len - skb_mac_offset(skb);
+	put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_PKTINFO,
+		 sizeof(ts_pktinfo), &ts_pktinfo);
+}
+
 /*
  * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
  */
@@ -670,14 +704,16 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
 {
 	int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
 	struct scm_timestamping tss;
-	int empty = 1;
+	int empty = 1, false_tstamp = 0;
 	struct skb_shared_hwtstamps *shhwtstamps =
 		skb_hwtstamps(skb);
 
 	/* Race occurred between timestamp enabling and packet
 	   receiving.  Fill in the current time for now. */
-	if (need_software_tstamp && skb->tstamp == 0)
+	if (need_software_tstamp && skb->tstamp == 0) {
 		__net_timestamp(skb);
+		false_tstamp = 1;
+	}
 
 	if (need_software_tstamp) {
 		if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
@@ -699,8 +735,13 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
 		empty = 0;
 	if (shhwtstamps &&
 	    (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
-	    ktime_to_timespec_cond(shhwtstamps->hwtstamp, tss.ts + 2))
+	    !skb_is_swtx_tstamp(skb, false_tstamp) &&
+	    ktime_to_timespec_cond(shhwtstamps->hwtstamp, tss.ts + 2)) {
 		empty = 0;
+		if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_PKTINFO) &&
+		    !skb_is_err_queue(skb))
+			put_ts_pktinfo(msg, skb);
+	}
 	if (!empty) {
 		put_cmsg(msg, SOL_SOCKET,
 			 SCM_TIMESTAMPING, sizeof(tss), &tss);