47 files changed, 1670 insertions, 557 deletions
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index e95741388311..1b9afdee5ba9 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -194,6 +194,17 @@ config NET_SCH_ETF
 	  To compile this code as a module, choose M here: the
 	  module will be called sch_etf.
 
+config NET_SCH_TAPRIO
+	tristate "Time Aware Priority (taprio) Scheduler"
+	help
+	  Say Y here if you want to use the Time Aware Priority (taprio) packet
+	  scheduling algorithm.
+
+	  See the top of <file:net/sched/sch_taprio.c> for more details.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called sch_taprio.
+
 config NET_SCH_GRED
 	tristate "Generic Random Early Detection (GRED)"
 	---help---
diff --git a/net/sched/Makefile b/net/sched/Makefile
index f0403f49edcb..8a40431d7b5c 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -57,6 +57,7 @@ obj-$(CONFIG_NET_SCH_HHF)	+= sch_hhf.o
 obj-$(CONFIG_NET_SCH_PIE)	+= sch_pie.o
 obj-$(CONFIG_NET_SCH_CBS)	+= sch_cbs.o
 obj-$(CONFIG_NET_SCH_ETF)	+= sch_etf.o
+obj-$(CONFIG_NET_SCH_TAPRIO)	+= sch_taprio.o
 
 obj-$(CONFIG_NET_CLS_U32)	+= cls_u32.o
 obj-$(CONFIG_NET_CLS_ROUTE4)	+= cls_route.o
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index e12f8ef7baa4..9c1b0729aebf 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -81,6 +81,7 @@ static void tcf_set_action_cookie(struct tc_cookie __rcu **old_cookie,
 static void free_tcf(struct tc_action *p)
 {
 	free_percpu(p->cpu_bstats);
+	free_percpu(p->cpu_bstats_hw);
 	free_percpu(p->cpu_qstats);
 
 	tcf_set_action_cookie(&p->act_cookie, NULL);
@@ -103,11 +104,11 @@ static int __tcf_action_put(struct tc_action *p, bool bind)
 {
 	struct tcf_idrinfo *idrinfo = p->idrinfo;
 
-	if (refcount_dec_and_lock(&p->tcfa_refcnt, &idrinfo->lock)) {
+	if (refcount_dec_and_mutex_lock(&p->tcfa_refcnt, &idrinfo->lock)) {
 		if (bind)
 			atomic_dec(&p->tcfa_bindcnt);
 		idr_remove(&idrinfo->action_idr, p->tcfa_index);
-		spin_unlock(&idrinfo->lock);
+		mutex_unlock(&idrinfo->lock);
 
 		tcf_action_cleanup(p);
 		return 1;
@@ -199,7 +200,7 @@ static int tcf_dump_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb,
 	struct tc_action *p;
 	unsigned long id = 1;
 
-	spin_lock(&idrinfo->lock);
+	mutex_lock(&idrinfo->lock);
 
 	s_i = cb->args[0];
 
@@ -234,7 +235,7 @@ done:
 	if (index >= 0)
 		cb->args[0] = index + 1;
 
-	spin_unlock(&idrinfo->lock);
+	mutex_unlock(&idrinfo->lock);
 	if (n_i) {
 		if (act_flags & TCA_FLAG_LARGE_DUMP_ON)
 			cb->args[1] = n_i;
@@ -246,6 +247,20 @@ nla_put_failure:
 	goto done;
 }
 
+static int tcf_idr_release_unsafe(struct tc_action *p)
+{
+	if (atomic_read(&p->tcfa_bindcnt) > 0)
+		return -EPERM;
+
+	if (refcount_dec_and_test(&p->tcfa_refcnt)) {
+		idr_remove(&p->idrinfo->action_idr, p->tcfa_index);
+		tcf_action_cleanup(p);
+		return ACT_P_DELETED;
+	}
+
+	return 0;
+}
+
 static int tcf_del_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb,
 			  const struct tc_action_ops *ops)
 {
@@ -262,15 +277,19 @@ static int tcf_del_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb,
 	if (nla_put_string(skb, TCA_KIND, ops->kind))
 		goto nla_put_failure;
 
+	mutex_lock(&idrinfo->lock);
 	idr_for_each_entry_ul(idr, p, id) {
-		ret = __tcf_idr_release(p, false, true);
+		ret = tcf_idr_release_unsafe(p);
 		if (ret == ACT_P_DELETED) {
 			module_put(ops->owner);
 			n_i++;
 		} else if (ret < 0) {
+			mutex_unlock(&idrinfo->lock);
 			goto nla_put_failure;
 		}
 	}
+	mutex_unlock(&idrinfo->lock);
+
 	if (nla_put_u32(skb, TCA_FCNT, n_i))
 		goto nla_put_failure;
 	nla_nest_end(skb, nest);
@@ -305,13 +324,13 @@ int tcf_idr_search(struct tc_action_net *tn, struct tc_action **a, u32 index)
 	struct tcf_idrinfo *idrinfo = tn->idrinfo;
 	struct tc_action *p;
 
-	spin_lock(&idrinfo->lock);
+	mutex_lock(&idrinfo->lock);
 	p = idr_find(&idrinfo->action_idr, index);
 	if (IS_ERR(p))
 		p = NULL;
 	else if (p)
 		refcount_inc(&p->tcfa_refcnt);
-	spin_unlock(&idrinfo->lock);
+	mutex_unlock(&idrinfo->lock);
 
 	if (p) {
 		*a = p;
@@ -326,10 +345,10 @@ static int tcf_idr_delete_index(struct tcf_idrinfo *idrinfo, u32 index)
 	struct tc_action *p;
 	int ret = 0;
 
-	spin_lock(&idrinfo->lock);
+	mutex_lock(&idrinfo->lock);
 	p = idr_find(&idrinfo->action_idr, index);
 	if (!p) {
-		spin_unlock(&idrinfo->lock);
+		mutex_unlock(&idrinfo->lock);
 		return -ENOENT;
 	}
 
@@ -339,7 +358,7 @@ static int tcf_idr_delete_index(struct tcf_idrinfo *idrinfo, u32 index)
 
 			WARN_ON(p != idr_remove(&idrinfo->action_idr,
 						p->tcfa_index));
-			spin_unlock(&idrinfo->lock);
+			mutex_unlock(&idrinfo->lock);
 
 			tcf_action_cleanup(p);
 			module_put(owner);
@@ -350,7 +369,7 @@ static int tcf_idr_delete_index(struct tcf_idrinfo *idrinfo, u32 index)
 		ret = -EPERM;
 	}
 
-	spin_unlock(&idrinfo->lock);
+	mutex_unlock(&idrinfo->lock);
 	return ret;
 }
 
@@ -372,9 +391,12 @@ int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est,
 		p->cpu_bstats = netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu);
 		if (!p->cpu_bstats)
 			goto err1;
+		p->cpu_bstats_hw = netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu);
+		if (!p->cpu_bstats_hw)
+			goto err2;
 		p->cpu_qstats = alloc_percpu(struct gnet_stats_queue);
 		if (!p->cpu_qstats)
-			goto err2;
+			goto err3;
 	}
 	spin_lock_init(&p->tcfa_lock);
 	p->tcfa_index = index;
@@ -386,15 +408,17 @@ int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est,
 					&p->tcfa_rate_est,
 					&p->tcfa_lock, NULL, est);
 		if (err)
-			goto err3;
+			goto err4;
 	}
 
 	p->idrinfo = idrinfo;
 	p->ops = ops;
 	*a = p;
 	return 0;
-err3:
+err4:
 	free_percpu(p->cpu_qstats);
+err3:
+	free_percpu(p->cpu_bstats_hw);
 err2:
 	free_percpu(p->cpu_bstats);
 err1:
@@ -407,10 +431,10 @@ void tcf_idr_insert(struct tc_action_net *tn, struct tc_action *a)
 {
 	struct tcf_idrinfo *idrinfo = tn->idrinfo;
 
-	spin_lock(&idrinfo->lock);
+	mutex_lock(&idrinfo->lock);
 	/* Replace ERR_PTR(-EBUSY) allocated by tcf_idr_check_alloc */
 	WARN_ON(!IS_ERR(idr_replace(&idrinfo->action_idr, a, a->tcfa_index)));
-	spin_unlock(&idrinfo->lock);
+	mutex_unlock(&idrinfo->lock);
 }
 EXPORT_SYMBOL(tcf_idr_insert);
 
@@ -420,10 +444,10 @@ void tcf_idr_cleanup(struct tc_action_net *tn, u32 index)
 {
 	struct tcf_idrinfo *idrinfo = tn->idrinfo;
 
-	spin_lock(&idrinfo->lock);
+	mutex_lock(&idrinfo->lock);
 	/* Remove ERR_PTR(-EBUSY) allocated by tcf_idr_check_alloc */
 	WARN_ON(!IS_ERR(idr_remove(&idrinfo->action_idr, index)));
-	spin_unlock(&idrinfo->lock);
+	mutex_unlock(&idrinfo->lock);
 }
 EXPORT_SYMBOL(tcf_idr_cleanup);
 
@@ -441,14 +465,14 @@ int tcf_idr_check_alloc(struct tc_action_net *tn, u32 *index,
 	int ret;
 
 again:
-	spin_lock(&idrinfo->lock);
+	mutex_lock(&idrinfo->lock);
 	if (*index) {
 		p = idr_find(&idrinfo->action_idr, *index);
 		if (IS_ERR(p)) {
 			/* This means that another process allocated
 			 * index but did not assign the pointer yet.
 			 */
-			spin_unlock(&idrinfo->lock);
+			mutex_unlock(&idrinfo->lock);
 			goto again;
 		}
 
@@ -461,7 +485,7 @@ again:
 		} else {
 			*a = NULL;
 			ret = idr_alloc_u32(&idrinfo->action_idr, NULL, index,
-					    *index, GFP_ATOMIC);
+					    *index, GFP_KERNEL);
 			if (!ret)
 				idr_replace(&idrinfo->action_idr,
 					    ERR_PTR(-EBUSY), *index);
@@ -470,12 +494,12 @@ again:
 		*index = 1;
 		*a = NULL;
 		ret = idr_alloc_u32(&idrinfo->action_idr, NULL, index,
-				    UINT_MAX, GFP_ATOMIC);
+				    UINT_MAX, GFP_KERNEL);
 		if (!ret)
 			idr_replace(&idrinfo->action_idr, ERR_PTR(-EBUSY),
 				    *index);
 	}
-	spin_unlock(&idrinfo->lock);
+	mutex_unlock(&idrinfo->lock);
 	return ret;
 }
 EXPORT_SYMBOL(tcf_idr_check_alloc);
@@ -979,6 +1003,8 @@ int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *p,
 		goto errout;
 
 	if (gnet_stats_copy_basic(NULL, &d, p->cpu_bstats, &p->tcfa_bstats) < 0 ||
+	    gnet_stats_copy_basic_hw(NULL, &d, p->cpu_bstats_hw,
+				     &p->tcfa_bstats_hw) < 0 ||
 	    gnet_stats_copy_rate_est(&d, &p->tcfa_rate_est) < 0 ||
 	    gnet_stats_copy_queue(&d, p->cpu_qstats,
 				  &p->tcfa_qstats,
@@ -1073,12 +1099,14 @@ static struct tc_action *tcf_action_get_1(struct net *net, struct nlattr *nla,
 	err = -EINVAL;
 	ops = tc_lookup_action(tb[TCA_ACT_KIND]);
 	if (!ops) { /* could happen in batch of actions */
-		NL_SET_ERR_MSG(extack, "Specified TC action not found");
+		NL_SET_ERR_MSG(extack, "Specified TC action kind not found");
 		goto err_out;
 	}
 	err = -ENOENT;
-	if (ops->lookup(net, &a, index, extack) == 0)
+	if (ops->lookup(net, &a, index) == 0) {
+		NL_SET_ERR_MSG(extack, "TC action with specified index not found");
 		goto err_mod;
+	}
 
 	module_put(ops->owner);
 	return a;
@@ -1424,7 +1452,7 @@ static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
 	u32 act_count = 0;
 
 	ret = nlmsg_parse(cb->nlh, sizeof(struct tcamsg), tb, TCA_ROOT_MAX,
-			  tcaa_policy, NULL);
+			  tcaa_policy, cb->extack);
 	if (ret < 0)
 		return ret;
 
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index 0c68bc9cf0b4..c7633843e223 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -387,8 +387,7 @@ static int tcf_bpf_walker(struct net *net, struct sk_buff *skb,
 	return tcf_generic_walker(tn, skb, cb, type, ops, extack);
 }
 
-static int tcf_bpf_search(struct net *net, struct tc_action **a, u32 index,
-			  struct netlink_ext_ack *extack)
+static int tcf_bpf_search(struct net *net, struct tc_action **a, u32 index)
 {
 	struct tc_action_net *tn = net_generic(net, bpf_net_id);
 
diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c
index 6f0f273f1139..8475913f2070 100644
--- a/net/sched/act_connmark.c
+++ b/net/sched/act_connmark.c
@@ -143,8 +143,10 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla,
 			return -EEXIST;
 		}
 		/* replacing action and zone */
+		spin_lock_bh(&ci->tcf_lock);
 		ci->tcf_action = parm->action;
 		ci->zone = parm->zone;
+		spin_unlock_bh(&ci->tcf_lock);
 		ret = 0;
 	}
 
@@ -156,16 +158,16 @@ static inline int tcf_connmark_dump(struct sk_buff *skb, struct tc_action *a,
 {
 	unsigned char *b = skb_tail_pointer(skb);
 	struct tcf_connmark_info *ci = to_connmark(a);
-
 	struct tc_connmark opt = {
 		.index   = ci->tcf_index,
 		.refcnt  = refcount_read(&ci->tcf_refcnt) - ref,
 		.bindcnt = atomic_read(&ci->tcf_bindcnt) - bind,
-		.action  = ci->tcf_action,
-		.zone   = ci->zone,
 	};
 	struct tcf_t t;
 
+	spin_lock_bh(&ci->tcf_lock);
+	opt.action = ci->tcf_action;
+	opt.zone = ci->zone;
 	if (nla_put(skb, TCA_CONNMARK_PARMS, sizeof(opt), &opt))
 		goto nla_put_failure;
 
@@ -173,9 +175,12 @@ static inline int tcf_connmark_dump(struct sk_buff *skb, struct tc_action *a,
 	if (nla_put_64bit(skb, TCA_CONNMARK_TM, sizeof(t), &t,
 			  TCA_CONNMARK_PAD))
 		goto nla_put_failure;
+	spin_unlock_bh(&ci->tcf_lock);
 
 	return skb->len;
+
 nla_put_failure:
+	spin_unlock_bh(&ci->tcf_lock);
 	nlmsg_trim(skb, b);
 	return -1;
 }
@@ -190,8 +195,7 @@ static int tcf_connmark_walker(struct net *net, struct sk_buff *skb,
 	return tcf_generic_walker(tn, skb, cb, type, ops, extack);
 }
 
-static int tcf_connmark_search(struct net *net, struct tc_action **a, u32 index,
-			       struct netlink_ext_ack *extack)
+static int tcf_connmark_search(struct net *net, struct tc_action **a, u32 index)
 {
 	struct tc_action_net *tn = net_generic(net, connmark_net_id);
 
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index b8a67ae3105a..3dc25b7806d7 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -646,8 +646,7 @@ static int tcf_csum_walker(struct net *net, struct sk_buff *skb,
 	return tcf_generic_walker(tn, skb, cb, type, ops, extack);
 }
 
-static int tcf_csum_search(struct net *net, struct tc_action **a, u32 index,
-			   struct netlink_ext_ack *extack)
+static int tcf_csum_search(struct net *net, struct tc_action **a, u32 index)
 {
 	struct tc_action_net *tn = net_generic(net, csum_net_id);
 
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index cd1d9bd32ef9..b61c20ebb314 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -88,6 +88,11 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla,
 		p_parm = nla_data(tb[TCA_GACT_PROB]);
 		if (p_parm->ptype >= MAX_RAND)
 			return -EINVAL;
+		if (TC_ACT_EXT_CMP(p_parm->paction, TC_ACT_GOTO_CHAIN)) {
+			NL_SET_ERR_MSG(extack,
+				       "goto chain not allowed on fallback");
+			return -EINVAL;
+		}
 	}
 #endif
 
@@ -157,7 +162,7 @@ static int tcf_gact_act(struct sk_buff *skb, const struct tc_action *a,
 }
 
 static void tcf_gact_stats_update(struct tc_action *a, u64 bytes, u32 packets,
-				  u64 lastuse)
+				  u64 lastuse, bool hw)
 {
 	struct tcf_gact *gact = to_gact(a);
 	int action = READ_ONCE(gact->tcf_action);
@@ -168,6 +173,10 @@ static void tcf_gact_stats_update(struct tc_action *a, u64 bytes, u32 packets,
 	if (action == TC_ACT_SHOT)
 		this_cpu_ptr(gact->common.cpu_qstats)->drops += packets;
 
+	if (hw)
+		_bstats_cpu_update(this_cpu_ptr(gact->common.cpu_bstats_hw),
+				   bytes, packets);
+
 	tm->lastuse = max_t(u64, tm->lastuse, lastuse);
 }
 
@@ -222,8 +231,7 @@ static int tcf_gact_walker(struct net *net, struct sk_buff *skb,
 	return tcf_generic_walker(tn, skb, cb, type, ops, extack);
 }
 
-static int tcf_gact_search(struct net *net, struct tc_action **a, u32 index,
-			   struct netlink_ext_ack *extack)
+static int tcf_gact_search(struct net *net, struct tc_action **a, u32 index)
 {
 	struct tc_action_net *tn = net_generic(net, gact_net_id);
 
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index 06a3d4801878..30b63fa23ee2 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -855,8 +855,7 @@ static int tcf_ife_walker(struct net *net, struct sk_buff *skb,
 	return tcf_generic_walker(tn, skb, cb, type, ops, extack);
 }
 
-static int tcf_ife_search(struct net *net, struct tc_action **a, u32 index,
-			  struct netlink_ext_ack *extack)
+static int tcf_ife_search(struct net *net, struct tc_action **a, u32 index)
 {
 	struct tc_action_net *tn = net_generic(net, ife_net_id);
 
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 23273b5303fd..8af6c11d2482 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -135,7 +135,7 @@ static int __tcf_ipt_init(struct net *net, unsigned int id, struct nlattr *nla,
 	}
 
 	td = (struct xt_entry_target *)nla_data(tb[TCA_IPT_TARG]);
-	if (nla_len(tb[TCA_IPT_TARG]) < td->u.target_size) {
+	if (nla_len(tb[TCA_IPT_TARG]) != td->u.target_size) {
 		if (exists)
 			tcf_idr_release(*a, bind);
 		else
@@ -329,8 +329,7 @@ static int tcf_ipt_walker(struct net *net, struct sk_buff *skb,
 	return tcf_generic_walker(tn, skb, cb, type, ops, extack);
 }
 
-static int tcf_ipt_search(struct net *net, struct tc_action **a, u32 index,
-			  struct netlink_ext_ack *extack)
+static int tcf_ipt_search(struct net *net, struct tc_action **a, u32 index)
 {
 	struct tc_action_net *tn = net_generic(net, ipt_net_id);
 
@@ -379,8 +378,7 @@ static int tcf_xt_walker(struct net *net, struct sk_buff *skb,
 	return tcf_generic_walker(tn, skb, cb, type, ops, extack);
 }
 
-static int tcf_xt_search(struct net *net, struct tc_action **a, u32 index,
-			 struct netlink_ext_ack *extack)
+static int tcf_xt_search(struct net *net, struct tc_action **a, u32 index)
 {
 	struct tc_action_net *tn = net_generic(net, xt_net_id);
 
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 8bf66d0a6800..1dae5f2b358f 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -283,12 +283,15 @@ out:
 }
 
 static void tcf_stats_update(struct tc_action *a, u64 bytes, u32 packets,
-			     u64 lastuse)
+			     u64 lastuse, bool hw)
 {
 	struct tcf_mirred *m = to_mirred(a);
 	struct tcf_t *tm = &m->tcf_tm;
 
 	_bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), bytes, packets);
+	if (hw)
+		_bstats_cpu_update(this_cpu_ptr(a->cpu_bstats_hw),
+				   bytes, packets);
 	tm->lastuse = max_t(u64, tm->lastuse, lastuse);
 }
 
@@ -338,8 +341,7 @@ static int tcf_mirred_walker(struct net *net, struct sk_buff *skb,
 	return tcf_generic_walker(tn, skb, cb, type, ops, extack);
 }
 
-static int tcf_mirred_search(struct net *net, struct tc_action **a, u32 index,
-			     struct netlink_ext_ack *extack)
+static int tcf_mirred_search(struct net *net, struct tc_action **a, u32 index)
 {
 	struct tc_action_net *tn = net_generic(net, mirred_net_id);
 
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index 4313aa102440..c5c1e23add77 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -256,28 +256,31 @@ static int tcf_nat_dump(struct sk_buff *skb, struct tc_action *a,
 	unsigned char *b = skb_tail_pointer(skb);
 	struct tcf_nat *p = to_tcf_nat(a);
 	struct tc_nat opt = {
-		.old_addr = p->old_addr,
-		.new_addr = p->new_addr,
-		.mask     = p->mask,
-		.flags    = p->flags,
-
 		.index    = p->tcf_index,
-		.action   = p->tcf_action,
 		.refcnt   = refcount_read(&p->tcf_refcnt) - ref,
 		.bindcnt  = atomic_read(&p->tcf_bindcnt) - bind,
 	};
 	struct tcf_t t;
 
+	spin_lock_bh(&p->tcf_lock);
+	opt.old_addr = p->old_addr;
+	opt.new_addr = p->new_addr;
+	opt.mask = p->mask;
+	opt.flags = p->flags;
+	opt.action = p->tcf_action;
+
 	if (nla_put(skb, TCA_NAT_PARMS, sizeof(opt), &opt))
 		goto nla_put_failure;
 
 	tcf_tm_dump(&t, &p->tcf_tm);
 	if (nla_put_64bit(skb, TCA_NAT_TM, sizeof(t), &t, TCA_NAT_PAD))
 		goto nla_put_failure;
+	spin_unlock_bh(&p->tcf_lock);
 
 	return skb->len;
 
 nla_put_failure:
+	spin_unlock_bh(&p->tcf_lock);
 	nlmsg_trim(skb, b);
 	return -1;
 }
@@ -292,8 +295,7 @@ static int tcf_nat_walker(struct net *net, struct sk_buff *skb,
 	return tcf_generic_walker(tn, skb, cb, type, ops, extack);
 }
 
-static int tcf_nat_search(struct net *net, struct tc_action **a, u32 index,
-			  struct netlink_ext_ack *extack)
+static int tcf_nat_search(struct net *net, struct tc_action **a, u32 index)
 {
 	struct tc_action_net *tn = net_generic(net, nat_net_id);
 
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index ad99a99f11f6..da3dd0f68cc2 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -460,8 +460,7 @@ static int tcf_pedit_walker(struct net *net, struct sk_buff *skb,
 	return tcf_generic_walker(tn, skb, cb, type, ops, extack);
 }
 
-static int tcf_pedit_search(struct net *net, struct tc_action **a, u32 index,
-			    struct netlink_ext_ack *extack)
+static int tcf_pedit_search(struct net *net, struct tc_action **a, u32 index)
 {
 	struct tc_action_net *tn = net_generic(net, pedit_net_id);
 
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 5d8bfa878477..052855d47354 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -22,8 +22,7 @@
 #include <net/act_api.h>
 #include <net/netlink.h>
 
-struct tcf_police {
-	struct tc_action	common;
+struct tcf_police_params {
 	int			tcfp_result;
 	u32			tcfp_ewma_rate;
 	s64			tcfp_burst;
@@ -36,6 +35,12 @@ struct tcf_police {
 	bool			rate_present;
 	struct psched_ratecfg	peak;
 	bool			peak_present;
+	struct rcu_head rcu;
+};
+
+struct tcf_police {
+	struct tc_action	common;
+	struct tcf_police_params __rcu *params;
 };
 
 #define to_police(pc) ((struct tcf_police *)pc)
@@ -84,6 +89,7 @@ static int tcf_police_init(struct net *net, struct nlattr *nla,
 	struct tcf_police *police;
 	struct qdisc_rate_table *R_tab = NULL, *P_tab = NULL;
 	struct tc_action_net *tn = net_generic(net, police_net_id);
+	struct tcf_police_params *new;
 	bool exists = false;
 	int size;
 
@@ -110,7 +116,7 @@ static int tcf_police_init(struct net *net, struct nlattr *nla,
 
 	if (!exists) {
 		ret = tcf_idr_create(tn, parm->index, NULL, a,
-				     &act_police_ops, bind, false);
+				     &act_police_ops, bind, true);
 		if (ret) {
 			tcf_idr_cleanup(tn, parm->index);
 			return ret;
@@ -137,7 +143,8 @@ static int tcf_police_init(struct net *net, struct nlattr *nla,
 	}
 
 	if (est) {
-		err = gen_replace_estimator(&police->tcf_bstats, NULL,
+		err = gen_replace_estimator(&police->tcf_bstats,
+					    police->common.cpu_bstats,
 					    &police->tcf_rate_est,
 					    &police->tcf_lock,
 					    NULL, est);
@@ -150,50 +157,68 @@ static int tcf_police_init(struct net *net, struct nlattr *nla,
 		goto failure;
 	}
 
-	spin_lock_bh(&police->tcf_lock);
+	new = kzalloc(sizeof(*new), GFP_KERNEL);
+	if (unlikely(!new)) {
+		err = -ENOMEM;
+		goto failure;
+	}
+
 	/* No failure allowed after this point */
-	police->tcfp_mtu = parm->mtu;
-	if (police->tcfp_mtu == 0) {
-		police->tcfp_mtu = ~0;
+	new->tcfp_mtu = parm->mtu;
+	if (!new->tcfp_mtu) {
+		new->tcfp_mtu = ~0;
 		if (R_tab)
-			police->tcfp_mtu = 255 << R_tab->rate.cell_log;
+			new->tcfp_mtu = 255 << R_tab->rate.cell_log;
 	}
 	if (R_tab) {
-		police->rate_present = true;
-		psched_ratecfg_precompute(&police->rate, &R_tab->rate, 0);
+		new->rate_present = true;
+		psched_ratecfg_precompute(&new->rate, &R_tab->rate, 0);
 		qdisc_put_rtab(R_tab);
 	} else {
-		police->rate_present = false;
+		new->rate_present = false;
 	}
 	if (P_tab) {
-		police->peak_present = true;
-		psched_ratecfg_precompute(&police->peak, &P_tab->rate, 0);
+		new->peak_present = true;
+		psched_ratecfg_precompute(&new->peak, &P_tab->rate, 0);
 		qdisc_put_rtab(P_tab);
 	} else {
-		police->peak_present = false;
+		new->peak_present = false;
 	}
 
-	if (tb[TCA_POLICE_RESULT])
-		police->tcfp_result = nla_get_u32(tb[TCA_POLICE_RESULT]);
-	police->tcfp_burst = PSCHED_TICKS2NS(parm->burst);
-	police->tcfp_toks = police->tcfp_burst;
-	if (police->peak_present) {
-		police->tcfp_mtu_ptoks = (s64) psched_l2t_ns(&police->peak,
-							     police->tcfp_mtu);
-		police->tcfp_ptoks = police->tcfp_mtu_ptoks;
+	new->tcfp_burst = PSCHED_TICKS2NS(parm->burst);
+	new->tcfp_toks = new->tcfp_burst;
+	if (new->peak_present) {
+		new->tcfp_mtu_ptoks = (s64)psched_l2t_ns(&new->peak,
+							 new->tcfp_mtu);
+		new->tcfp_ptoks = new->tcfp_mtu_ptoks;
 	}
-	police->tcf_action = parm->action;
 
 	if (tb[TCA_POLICE_AVRATE])
-		police->tcfp_ewma_rate = nla_get_u32(tb[TCA_POLICE_AVRATE]);
+		new->tcfp_ewma_rate = nla_get_u32(tb[TCA_POLICE_AVRATE]);
+
+	if (tb[TCA_POLICE_RESULT]) {
+		new->tcfp_result = nla_get_u32(tb[TCA_POLICE_RESULT]);
+		if (TC_ACT_EXT_CMP(new->tcfp_result, TC_ACT_GOTO_CHAIN)) {
+			NL_SET_ERR_MSG(extack,
+				       "goto chain not allowed on fallback");
+			err = -EINVAL;
+			goto failure;
+		}
+	}
 
+	spin_lock_bh(&police->tcf_lock);
+	new->tcfp_t_c = ktime_get_ns();
+	police->tcf_action = parm->action;
+	rcu_swap_protected(police->params,
+			   new,
+			   lockdep_is_held(&police->tcf_lock));
 	spin_unlock_bh(&police->tcf_lock);
-	if (ret != ACT_P_CREATED)
-		return ret;
 
-	police->tcfp_t_c = ktime_get_ns();
-	tcf_idr_insert(tn, *a);
+	if (new)
+		kfree_rcu(new, rcu);
 
+	if (ret == ACT_P_CREATED)
+		tcf_idr_insert(tn, *a);
 	return ret;
 
 failure:
@@ -207,64 +232,69 @@ static int tcf_police_act(struct sk_buff *skb, const struct tc_action *a,
 			  struct tcf_result *res)
 {
 	struct tcf_police *police = to_police(a);
-	s64 now;
-	s64 toks;
-	s64 ptoks = 0;
+	struct tcf_police_params *p;
+	s64 now, toks, ptoks = 0;
+	int ret;
 
-	spin_lock(&police->tcf_lock);
-
-	bstats_update(&police->tcf_bstats, skb);
 	tcf_lastuse_update(&police->tcf_tm);
+	bstats_cpu_update(this_cpu_ptr(police->common.cpu_bstats), skb);
+
+	ret = READ_ONCE(police->tcf_action);
+	p = rcu_dereference_bh(police->params);
 
-	if (police->tcfp_ewma_rate) {
+	if (p->tcfp_ewma_rate) {
 		struct gnet_stats_rate_est64 sample;
 
 		if (!gen_estimator_read(&police->tcf_rate_est, &sample) ||
-		    sample.bps >= police->tcfp_ewma_rate) {
-			police->tcf_qstats.overlimits++;
-			if (police->tcf_action == TC_ACT_SHOT)
-				police->tcf_qstats.drops++;
-			spin_unlock(&police->tcf_lock);
-			return police->tcf_action;
-		}
+		    sample.bps >= p->tcfp_ewma_rate)
+			goto inc_overlimits;
 	}
 
-	if (qdisc_pkt_len(skb) <= police->tcfp_mtu) {
-		if (!police->rate_present) {
-			spin_unlock(&police->tcf_lock);
-			return police->tcfp_result;
+	if (qdisc_pkt_len(skb) <= p->tcfp_mtu) {
+		if (!p->rate_present) {
+			ret = p->tcfp_result;
+			goto end;
 		}
 
 		now = ktime_get_ns();
-		toks = min_t(s64, now - police->tcfp_t_c,
-			     police->tcfp_burst);
-		if (police->peak_present) {
-			ptoks = toks + police->tcfp_ptoks;
-			if (ptoks > police->tcfp_mtu_ptoks)
-				ptoks = police->tcfp_mtu_ptoks;
-			ptoks -= (s64) psched_l2t_ns(&police->peak,
-						     qdisc_pkt_len(skb));
+		toks = min_t(s64, now - p->tcfp_t_c, p->tcfp_burst);
+		if (p->peak_present) {
+			ptoks = toks + p->tcfp_ptoks;
+			if (ptoks > p->tcfp_mtu_ptoks)
+				ptoks = p->tcfp_mtu_ptoks;
+			ptoks -= (s64)psched_l2t_ns(&p->peak,
+						    qdisc_pkt_len(skb));
 		}
-		toks += police->tcfp_toks;
-		if (toks > police->tcfp_burst)
-			toks = police->tcfp_burst;
-		toks -= (s64) psched_l2t_ns(&police->rate, qdisc_pkt_len(skb));
+		toks += p->tcfp_toks;
+		if (toks > p->tcfp_burst)
+			toks = p->tcfp_burst;
+		toks -= (s64)psched_l2t_ns(&p->rate, qdisc_pkt_len(skb));
 		if ((toks|ptoks) >= 0) {
-			police->tcfp_t_c = now;
-			police->tcfp_toks = toks;
-			police->tcfp_ptoks = ptoks;
-			if (police->tcfp_result == TC_ACT_SHOT)
-				police->tcf_qstats.drops++;
-			spin_unlock(&police->tcf_lock);
-			return police->tcfp_result;
+			p->tcfp_t_c = now;
+			p->tcfp_toks = toks;
+			p->tcfp_ptoks = ptoks;
+			ret = p->tcfp_result;
+			goto inc_drops;
 		}
 	}
 
-	police->tcf_qstats.overlimits++;
-	if (police->tcf_action == TC_ACT_SHOT)
-		police->tcf_qstats.drops++;
-	spin_unlock(&police->tcf_lock);
-	return police->tcf_action;
+inc_overlimits:
+	qstats_overlimit_inc(this_cpu_ptr(police->common.cpu_qstats));
+inc_drops:
+	if (ret == TC_ACT_SHOT)
+		qstats_drop_inc(this_cpu_ptr(police->common.cpu_qstats));
+end:
+	return ret;
+}
+
+static void tcf_police_cleanup(struct tc_action *a)
+{
+	struct tcf_police *police = to_police(a);
+	struct tcf_police_params *p;
+
+	p = rcu_dereference_protected(police->params, 1);
+	if (p)
+		kfree_rcu(p, rcu);
 }
 
 static int tcf_police_dump(struct sk_buff *skb, struct tc_action *a,
@@ -272,6 +302,7 @@ static int tcf_police_dump(struct sk_buff *skb, struct tc_action *a,
 {
 	unsigned char *b = skb_tail_pointer(skb);
 	struct tcf_police *police = to_police(a);
+	struct tcf_police_params *p;
 	struct tc_police opt = {
 		.index = police->tcf_index,
 		.refcnt = refcount_read(&police->tcf_refcnt) - ref,
@@ -281,19 +312,21 @@ static int tcf_police_dump(struct sk_buff *skb, struct tc_action *a,
 
 	spin_lock_bh(&police->tcf_lock);
 	opt.action = police->tcf_action;
-	opt.mtu = police->tcfp_mtu;
-	opt.burst = PSCHED_NS2TICKS(police->tcfp_burst);
-	if (police->rate_present)
-		psched_ratecfg_getrate(&opt.rate, &police->rate);
-	if (police->peak_present)
-		psched_ratecfg_getrate(&opt.peakrate, &police->peak);
+	p = rcu_dereference_protected(police->params,
+				      lockdep_is_held(&police->tcf_lock));
+	opt.mtu = p->tcfp_mtu;
+	opt.burst = PSCHED_NS2TICKS(p->tcfp_burst);
+	if (p->rate_present)
+		psched_ratecfg_getrate(&opt.rate, &p->rate);
+	if (p->peak_present)
+		psched_ratecfg_getrate(&opt.peakrate, &p->peak);
 	if (nla_put(skb, TCA_POLICE_TBF, sizeof(opt), &opt))
 		goto nla_put_failure;
-	if (police->tcfp_result &&
-	    nla_put_u32(skb, TCA_POLICE_RESULT, police->tcfp_result))
+	if (p->tcfp_result &&
+	    nla_put_u32(skb, TCA_POLICE_RESULT, p->tcfp_result))
 		goto nla_put_failure;
-	if (police->tcfp_ewma_rate &&
-	    nla_put_u32(skb, TCA_POLICE_AVRATE, police->tcfp_ewma_rate))
+	if (p->tcfp_ewma_rate &&
+	    nla_put_u32(skb, TCA_POLICE_AVRATE, p->tcfp_ewma_rate))
 		goto nla_put_failure;
 
 	t.install = jiffies_to_clock_t(jiffies - police->tcf_tm.install);
@@ -312,8 +345,7 @@ nla_put_failure:
 	return -1;
 }
 
-static int tcf_police_search(struct net *net, struct tc_action **a, u32 index,
-			     struct netlink_ext_ack *extack)
+static int tcf_police_search(struct net *net, struct tc_action **a, u32 index)
 {
 	struct tc_action_net *tn = net_generic(net, police_net_id);
 
@@ -333,6 +365,7 @@ static struct tc_action_ops act_police_ops = {
 	.init		=	tcf_police_init,
 	.walk		=	tcf_police_walker,
 	.lookup		=	tcf_police_search,
+	.cleanup	=	tcf_police_cleanup,
 	.size		=	sizeof(struct tcf_police),
 };
 
diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c
index 44e9c00657bc..1a0c682fd734 100644
--- a/net/sched/act_sample.c
+++ b/net/sched/act_sample.c
@@ -69,7 +69,7 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla,
 
 	if (!exists) {
 		ret = tcf_idr_create(tn, parm->index, est, a,
-				     &act_sample_ops, bind, false);
+				     &act_sample_ops, bind, true);
 		if (ret) {
 			tcf_idr_cleanup(tn, parm->index);
 			return ret;
@@ -224,8 +224,7 @@ static int tcf_sample_walker(struct net *net, struct sk_buff *skb,
 	return tcf_generic_walker(tn, skb, cb, type, ops, extack);
 }
 
-static int tcf_sample_search(struct net *net, struct tc_action **a, u32 index,
-			     struct netlink_ext_ack *extack)
+static int tcf_sample_search(struct net *net, struct tc_action **a, u32 index)
 {
 	struct tc_action_net *tn = net_generic(net, sample_net_id);
 
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index 52400d49f81f..902957beceb3 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -188,8 +188,7 @@ static int tcf_simp_walker(struct net *net, struct sk_buff *skb,
 	return tcf_generic_walker(tn, skb, cb, type, ops, extack);
 }
 
-static int tcf_simp_search(struct net *net, struct tc_action **a, u32 index,
-			   struct netlink_ext_ack *extack)
+static int tcf_simp_search(struct net *net, struct tc_action **a, u32 index)
 {
 	struct tc_action_net *tn = net_generic(net, simp_net_id);
 
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index 73e44ce2a883..64dba3708fce 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -99,7 +99,7 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
 			    struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, skbedit_net_id);
-	struct tcf_skbedit_params *params_old, *params_new;
+	struct tcf_skbedit_params *params_new;
 	struct nlattr *tb[TCA_SKBEDIT_MAX + 1];
 	struct tc_skbedit *parm;
 	struct tcf_skbedit *d;
@@ -187,8 +187,6 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
 		}
 	}
 
-	ASSERT_RTNL();
-
 	params_new = kzalloc(sizeof(*params_new), GFP_KERNEL);
 	if (unlikely(!params_new)) {
 		if (ret == ACT_P_CREATED)
@@ -210,11 +208,13 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
 	if (flags & SKBEDIT_F_MASK)
 		params_new->mask = *mask;
 
+	spin_lock_bh(&d->tcf_lock);
 	d->tcf_action = parm->action;
-	params_old = rtnl_dereference(d->params);
-	rcu_assign_pointer(d->params, params_new);
-	if (params_old)
-		kfree_rcu(params_old, rcu);
+	rcu_swap_protected(d->params, params_new,
+			   lockdep_is_held(&d->tcf_lock));
+	spin_unlock_bh(&d->tcf_lock);
+	if (params_new)
+		kfree_rcu(params_new, rcu);
 
 	if (ret == ACT_P_CREATED)
 		tcf_idr_insert(tn, *a);
@@ -231,12 +231,14 @@ static int tcf_skbedit_dump(struct sk_buff *skb, struct tc_action *a,
 		.index   = d->tcf_index,
 		.refcnt  = refcount_read(&d->tcf_refcnt) - ref,
 		.bindcnt = atomic_read(&d->tcf_bindcnt) - bind,
-		.action  = d->tcf_action,
 	};
 	u64 pure_flags = 0;
 	struct tcf_t t;
 
-	params = rtnl_dereference(d->params);
+	spin_lock_bh(&d->tcf_lock);
+	params = rcu_dereference_protected(d->params,
+					   lockdep_is_held(&d->tcf_lock));
+	opt.action = d->tcf_action;
 
 	if (nla_put(skb, TCA_SKBEDIT_PARMS, sizeof(opt), &opt))
 		goto nla_put_failure;
@@ -264,9 +266,12 @@ static int tcf_skbedit_dump(struct sk_buff *skb, struct tc_action *a,
 	tcf_tm_dump(&t, &d->tcf_tm);
 	if (nla_put_64bit(skb, TCA_SKBEDIT_TM, sizeof(t), &t, TCA_SKBEDIT_PAD))
 		goto nla_put_failure;
+	spin_unlock_bh(&d->tcf_lock);
+
 	return skb->len;
 
 nla_put_failure:
+	spin_unlock_bh(&d->tcf_lock);
 	nlmsg_trim(skb, b);
 	return -1;
 }
@@ -291,8 +296,7 @@ static int tcf_skbedit_walker(struct net *net, struct sk_buff *skb,
 	return tcf_generic_walker(tn, skb, cb, type, ops, extack);
 }
 
-static int tcf_skbedit_search(struct net *net, struct tc_action **a, u32 index,
-			      struct netlink_ext_ack *extack)
+static int tcf_skbedit_search(struct net *net, struct tc_action **a, u32 index)
 {
 	struct tc_action_net *tn = net_generic(net, skbedit_net_id);
 
diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c
index 588077fafd6c..59710a183bd3 100644
--- a/net/sched/act_skbmod.c
+++ b/net/sched/act_skbmod.c
@@ -251,8 +251,7 @@ static int tcf_skbmod_walker(struct net *net, struct sk_buff *skb,
 	return tcf_generic_walker(tn, skb, cb, type, ops, extack);
 }
 
-static int tcf_skbmod_search(struct net *net, struct tc_action **a, u32 index,
-			     struct netlink_ext_ack *extack)
+static int tcf_skbmod_search(struct net *net, struct tc_action **a, u32 index)
 {
 	struct tc_action_net *tn = net_generic(net, skbmod_net_id);
 
diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c
index 681f6f04e7da..4cca8f274662 100644
--- a/net/sched/act_tunnel_key.c
+++ b/net/sched/act_tunnel_key.c
@@ -548,8 +548,7 @@ static int tunnel_key_walker(struct net *net, struct sk_buff *skb,
 	return tcf_generic_walker(tn, skb, cb, type, ops, extack);
 }
 
-static int tunnel_key_search(struct net *net, struct tc_action **a, u32 index,
-			     struct netlink_ext_ack *extack)
+static int tunnel_key_search(struct net *net, struct tc_action **a, u32 index)
 {
 	struct tc_action_net *tn = net_generic(net, tunnel_key_net_id);
 
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index 033d273afe50..ba677d54a7af 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -288,8 +288,7 @@ static int tcf_vlan_walker(struct net *net, struct sk_buff *skb,
 	return tcf_generic_walker(tn, skb, cb, type, ops, extack);
 }
 
-static int tcf_vlan_search(struct net *net, struct tc_action **a, u32 index,
-			   struct netlink_ext_ack *extack)
+static int tcf_vlan_search(struct net *net, struct tc_action **a, u32 index)
 {
 	struct tc_action_net *tn = net_generic(net, vlan_net_id);
 
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 1a67af8a6e8c..f427a1e00e7e 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -31,6 +31,8 @@
 #include <net/pkt_sched.h>
 #include <net/pkt_cls.h>
 
+extern const struct nla_policy rtm_tca_policy[TCA_MAX + 1];
+
 /* The list of all installed classifier types */
 static LIST_HEAD(tcf_proto_base);
 
@@ -240,8 +242,8 @@ static void tcf_chain_destroy(struct tcf_chain *chain)
 	if (!chain->index)
 		block->chain0.chain = NULL;
 	kfree(chain);
-	if (list_empty(&block->chain_list) && block->refcnt == 0)
-		kfree(block);
+	if (list_empty(&block->chain_list) && !refcount_read(&block->refcnt))
+		kfree_rcu(block, rcu);
 }
 
 static void tcf_chain_hold(struct tcf_chain *chain)
@@ -473,6 +475,7 @@ tcf_chain0_head_change_cb_del(struct tcf_block *block,
 }
 
 struct tcf_net {
+	spinlock_t idr_lock; /* Protects idr */
 	struct idr idr;
 };
 
@@ -482,16 +485,25 @@ static int tcf_block_insert(struct tcf_block *block, struct net *net,
 			    struct netlink_ext_ack *extack)
 {
 	struct tcf_net *tn = net_generic(net, tcf_net_id);
+	int err;
+
+	idr_preload(GFP_KERNEL);
+	spin_lock(&tn->idr_lock);
+	err = idr_alloc_u32(&tn->idr, block, &block->index, block->index,
+			    GFP_NOWAIT);
+	spin_unlock(&tn->idr_lock);
+	idr_preload_end();
 
-	return idr_alloc_u32(&tn->idr, block, &block->index, block->index,
-			     GFP_KERNEL);
+	return err;
 }
 
 static void tcf_block_remove(struct tcf_block *block, struct net *net)
 {
 	struct tcf_net *tn = net_generic(net, tcf_net_id);
 
+	spin_lock(&tn->idr_lock);
 	idr_remove(&tn->idr, block->index);
+	spin_unlock(&tn->idr_lock);
 }
 
 static struct tcf_block *tcf_block_create(struct net *net, struct Qdisc *q,
@@ -510,7 +522,7 @@ static struct tcf_block *tcf_block_create(struct net *net, struct Qdisc *q,
 	INIT_LIST_HEAD(&block->owner_list);
 	INIT_LIST_HEAD(&block->chain0.filter_chain_list);
 
-	block->refcnt = 1;
+	refcount_set(&block->refcnt, 1);
 	block->net = net;
 	block->index = block_index;
 
@@ -527,6 +539,78 @@ static struct tcf_block *tcf_block_lookup(struct net *net, u32 block_index)
 	return idr_find(&tn->idr, block_index);
 }
 
+static struct tcf_block *tcf_block_refcnt_get(struct net *net, u32 block_index)
+{
+	struct tcf_block *block;
+
+	rcu_read_lock();
+	block = tcf_block_lookup(net, block_index);
+	if (block && !refcount_inc_not_zero(&block->refcnt))
+		block = NULL;
+	rcu_read_unlock();
+
+	return block;
+}
+
+static void tcf_block_flush_all_chains(struct tcf_block *block)
+{
+	struct tcf_chain *chain;
+
+	/* Hold a refcnt for all chains, so that they don't disappear
+	 * while we are iterating.
+	 */
+	list_for_each_entry(chain, &block->chain_list, list)
+		tcf_chain_hold(chain);
+
+	list_for_each_entry(chain, &block->chain_list, list)
+		tcf_chain_flush(chain);
+}
+
+static void tcf_block_put_all_chains(struct tcf_block *block)
+{
+	struct tcf_chain *chain, *tmp;
+
+	/* At this point, all the chains should have refcnt >= 1. */
+	list_for_each_entry_safe(chain, tmp, &block->chain_list, list) {
+		tcf_chain_put_explicitly_created(chain);
+		tcf_chain_put(chain);
+	}
+}
+
+static void __tcf_block_put(struct tcf_block *block, struct Qdisc *q,
+			    struct tcf_block_ext_info *ei)
+{
+	if (refcount_dec_and_test(&block->refcnt)) {
+		/* Flushing/putting all chains will cause the block to be
+		 * deallocated when last chain is freed. However, if chain_list
+		 * is empty, block has to be manually deallocated. After block
+		 * reference counter reached 0, it is no longer possible to
+		 * increment it or add new chains to block.
+		 */
+		bool free_block = list_empty(&block->chain_list);
+
+		if (tcf_block_shared(block))
+			tcf_block_remove(block, block->net);
+		if (!free_block)
+			tcf_block_flush_all_chains(block);
+
+		if (q)
+			tcf_block_offload_unbind(block, q, ei);
+
+		if (free_block)
+			kfree_rcu(block, rcu);
+		else
+			tcf_block_put_all_chains(block);
+	} else if (q) {
+		tcf_block_offload_unbind(block, q, ei);
+	}
+}
+
+static void tcf_block_refcnt_put(struct tcf_block *block)
+{
+	__tcf_block_put(block, NULL, NULL);
+}
+
 /* Find tcf block.
  * Set q, parent, cl when appropriate.
  */
@@ -537,9 +621,10 @@ static struct tcf_block *tcf_block_find(struct net *net, struct Qdisc **q,
 					struct netlink_ext_ack *extack)
 {
 	struct tcf_block *block;
+	int err = 0;
 
 	if (ifindex == TCM_IFINDEX_MAGIC_BLOCK) {
-		block = tcf_block_lookup(net, block_index);
+		block = tcf_block_refcnt_get(net, block_index);
 		if (!block) {
 			NL_SET_ERR_MSG(extack, "Block of given index was not found");
 			return ERR_PTR(-EINVAL);
@@ -548,55 +633,106 @@ static struct tcf_block *tcf_block_find(struct net *net, struct Qdisc **q,
 		const struct Qdisc_class_ops *cops;
 		struct net_device *dev;
 
+		rcu_read_lock();
+
 		/* Find link */
-		dev = __dev_get_by_index(net, ifindex);
-		if (!dev)
+		dev = dev_get_by_index_rcu(net, ifindex);
+		if (!dev) {
+			rcu_read_unlock();
 			return ERR_PTR(-ENODEV);
+		}
 
 		/* Find qdisc */
 		if (!*parent) {
 			*q = dev->qdisc;
 			*parent = (*q)->handle;
 		} else {
-			*q = qdisc_lookup(dev, TC_H_MAJ(*parent));
+			*q = qdisc_lookup_rcu(dev, TC_H_MAJ(*parent));
 			if (!*q) {
 				NL_SET_ERR_MSG(extack, "Parent Qdisc doesn't exists");
-				return ERR_PTR(-EINVAL);
+				err = -EINVAL;
+				goto errout_rcu;
 			}
 		}
 
+		*q = qdisc_refcount_inc_nz(*q);
+		if (!*q) {
+			NL_SET_ERR_MSG(extack, "Parent Qdisc doesn't exists");
+			err = -EINVAL;
+			goto errout_rcu;
+		}
+
 		/* Is it classful? */
 		cops = (*q)->ops->cl_ops;
 		if (!cops) {
 			NL_SET_ERR_MSG(extack, "Qdisc not classful");
-			return ERR_PTR(-EINVAL);
+			err = -EINVAL;
+			goto errout_rcu;
 		}
 
 		if (!cops->tcf_block) {
 			NL_SET_ERR_MSG(extack, "Class doesn't support blocks");
-			return ERR_PTR(-EOPNOTSUPP);
+			err = -EOPNOTSUPP;
+			goto errout_rcu;
 		}
 
+		/* At this point we know that qdisc is not noop_qdisc,
+		 * which means that qdisc holds a reference to net_device
+		 * and we hold a reference to qdisc, so it is safe to release
+		 * rcu read lock.
+		 */
+		rcu_read_unlock();
+
 		/* Do we search for filter, attached to class? */
 		if (TC_H_MIN(*parent)) {
 			*cl = cops->find(*q, *parent);
 			if (*cl == 0) {
 				NL_SET_ERR_MSG(extack, "Specified class doesn't exist");
-				return ERR_PTR(-ENOENT);
+				err = -ENOENT;
+				goto errout_qdisc;
 			}
 		}
 
 		/* And the last stroke */
 		block = cops->tcf_block(*q, *cl, extack);
-		if (!block)
-			return ERR_PTR(-EINVAL);
+		if (!block) {
+			err = -EINVAL;
+			goto errout_qdisc;
+		}
 		if (tcf_block_shared(block)) {
 			NL_SET_ERR_MSG(extack, "This filter block is shared. Please use the block index to manipulate the filters");
-			return ERR_PTR(-EOPNOTSUPP);
+			err = -EOPNOTSUPP;
+			goto errout_qdisc;
 		}
+
+		/* Always take reference to block in order to support execution
+		 * of rules update path of cls API without rtnl lock. Caller
+		 * must release block when it is finished using it. 'if' block
+		 * of this conditional obtain reference to block by calling
+		 * tcf_block_refcnt_get().
+		 */
+		refcount_inc(&block->refcnt);
 	}
 
 	return block;
+
+errout_rcu:
+	rcu_read_unlock();
+errout_qdisc:
+	if (*q) {
+		qdisc_put(*q);
+		*q = NULL;
+	}
+	return ERR_PTR(err);
+}
+
+static void tcf_block_release(struct Qdisc *q, struct tcf_block *block)
+{
+	if (!IS_ERR_OR_NULL(block))
+		tcf_block_refcnt_put(block);
+
+	if (q)
+		qdisc_put(q);
 }
 
 struct tcf_block_owner_item {
@@ -664,21 +800,16 @@ int tcf_block_get_ext(struct tcf_block **p_block, struct Qdisc *q,
 {
 	struct net *net = qdisc_net(q);
 	struct tcf_block *block = NULL;
-	bool created = false;
 	int err;
 
-	if (ei->block_index) {
+	if (ei->block_index)
 		/* block_index not 0 means the shared block is requested */
-		block = tcf_block_lookup(net, ei->block_index);
-		if (block)
-			block->refcnt++;
-	}
+		block = tcf_block_refcnt_get(net, ei->block_index);
 
 	if (!block) {
 		block = tcf_block_create(net, q, ei->block_index, extack);
 		if (IS_ERR(block))
 			return PTR_ERR(block);
-		created = true;
 		if (tcf_block_shared(block)) {
 			err = tcf_block_insert(block, net, extack);
 			if (err)
@@ -708,14 +839,8 @@ err_block_offload_bind:
 err_chain0_head_change_cb_add:
 	tcf_block_owner_del(block, q, ei->binder_type);
 err_block_owner_add:
-	if (created) {
-		if (tcf_block_shared(block))
-			tcf_block_remove(block, net);
 err_block_insert:
-		kfree(block);
-	} else {
-		block->refcnt--;
-	}
+	tcf_block_refcnt_put(block);
 	return err;
 }
 EXPORT_SYMBOL(tcf_block_get_ext);
@@ -747,42 +872,12 @@ EXPORT_SYMBOL(tcf_block_get);
 void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q,
 		       struct tcf_block_ext_info *ei)
 {
-	struct tcf_chain *chain, *tmp;
-
 	if (!block)
 		return;
 	tcf_chain0_head_change_cb_del(block, ei);
 	tcf_block_owner_del(block, q, ei->binder_type);
 
-	if (block->refcnt == 1) {
-		if (tcf_block_shared(block))
-			tcf_block_remove(block, block->net);
-
-		/* Hold a refcnt for all chains, so that they don't disappear
-		 * while we are iterating.
-		 */
-		list_for_each_entry(chain, &block->chain_list, list)
-			tcf_chain_hold(chain);
-
-		list_for_each_entry(chain, &block->chain_list, list)
-			tcf_chain_flush(chain);
-	}
-
-	tcf_block_offload_unbind(block, q, ei);
-
-	if (block->refcnt == 1) {
-		/* At this point, all the chains should have refcnt >= 1. */
-		list_for_each_entry_safe(chain, tmp, &block->chain_list, list) {
-			tcf_chain_put_explicitly_created(chain);
-			tcf_chain_put(chain);
-		}
-
-		block->refcnt--;
-		if (list_empty(&block->chain_list))
-			kfree(block);
-	} else {
-		block->refcnt--;
-	}
+	__tcf_block_put(block, q, ei);
 }
 EXPORT_SYMBOL(tcf_block_put_ext);
 
@@ -1211,7 +1306,7 @@ static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
 replay:
 	tp_created = 0;
 
-	err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, NULL, extack);
+	err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, rtm_tca_policy, extack);
 	if (err < 0)
 		return err;
 
@@ -1332,6 +1427,7 @@ replay:
 errout:
 	if (chain)
 		tcf_chain_put(chain);
+	tcf_block_release(q, block);
 	if (err == -EAGAIN)
 		/* Replay the request. */
 		goto replay;
@@ -1360,7 +1456,7 @@ static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
 	if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
 		return -EPERM;
 
-	err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, NULL, extack);
+	err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, rtm_tca_policy, extack);
 	if (err < 0)
 		return err;
 
@@ -1453,6 +1549,7 @@ static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
 errout:
 	if (chain)
 		tcf_chain_put(chain);
+	tcf_block_release(q, block);
 	return err;
 }
 
@@ -1475,7 +1572,7 @@ static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
 	void *fh = NULL;
 	int err;
 
-	err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, NULL, extack);
+	err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, rtm_tca_policy, extack);
 	if (err < 0)
 		return err;
 
@@ -1538,6 +1635,7 @@ static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
 errout:
 	if (chain)
 		tcf_chain_put(chain);
+	tcf_block_release(q, block);
 	return err;
 }
 
@@ -1631,12 +1729,13 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
 	if (nlmsg_len(cb->nlh) < sizeof(*tcm))
 		return skb->len;
 
-	err = nlmsg_parse(cb->nlh, sizeof(*tcm), tca, TCA_MAX, NULL, NULL);
+	err = nlmsg_parse(cb->nlh, sizeof(*tcm), tca, TCA_MAX, NULL,
+			  cb->extack);
 	if (err)
 		return err;
 
 	if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK) {
-		block = tcf_block_lookup(net, tcm->tcm_block_index);
+		block = tcf_block_refcnt_get(net, tcm->tcm_block_index);
 		if (!block)
 			goto out;
 		/* If we work with block index, q is NULL and parent value
@@ -1695,6 +1794,8 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
 		}
 	}
 
+	if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK)
+		tcf_block_refcnt_put(block);
 	cb->args[0] = index;
 
 out:
@@ -1838,7 +1939,7 @@ static int tc_ctl_chain(struct sk_buff *skb, struct nlmsghdr *n,
 		return -EPERM;
 
 replay:
-	err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, NULL, extack);
+	err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, rtm_tca_policy, extack);
 	if (err < 0)
 		return err;
 
@@ -1854,7 +1955,8 @@ replay:
 	chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0;
 	if (chain_index > TC_ACT_EXT_VAL_MASK) {
 		NL_SET_ERR_MSG(extack, "Specified chain index exceeds upper limit");
-		return -EINVAL;
+		err = -EINVAL;
+		goto errout_block;
 	}
 	chain = tcf_chain_lookup(block, chain_index);
 	if (n->nlmsg_type == RTM_NEWCHAIN) {
@@ -1866,23 +1968,27 @@ replay:
 				tcf_chain_hold(chain);
 			} else {
 				NL_SET_ERR_MSG(extack, "Filter chain already exists");
-				return -EEXIST;
+				err = -EEXIST;
+				goto errout_block;
 			}
 		} else {
 			if (!(n->nlmsg_flags & NLM_F_CREATE)) {
 				NL_SET_ERR_MSG(extack, "Need both RTM_NEWCHAIN and NLM_F_CREATE to create a new chain");
-				return -ENOENT;
+				err = -ENOENT;
+				goto errout_block;
 			}
 			chain = tcf_chain_create(block, chain_index);
 			if (!chain) {
 				NL_SET_ERR_MSG(extack, "Failed to create filter chain");
-				return -ENOMEM;
+				err = -ENOMEM;
+				goto errout_block;
 			}
 		}
 	} else {
 		if (!chain || tcf_chain_held_by_acts_only(chain)) {
 			NL_SET_ERR_MSG(extack, "Cannot find specified filter chain");
-			return -EINVAL;
+			err = -EINVAL;
+			goto errout_block;
 		}
 		tcf_chain_hold(chain);
 	}
@@ -1902,6 +2008,8 @@ replay:
 				RTM_NEWCHAIN, false);
 		break;
 	case RTM_DELCHAIN:
+		tfilter_notify_chain(net, skb, block, q, parent, n,
+				     chain, RTM_DELTFILTER);
 		/* Flush the chain first as the user requested chain removal. */
 		tcf_chain_flush(chain);
 		/* In case the chain was successfully deleted, put a reference
@@ -1924,6 +2032,8 @@ replay:
 
 errout:
 	tcf_chain_put(chain);
+errout_block:
+	tcf_block_release(q, block);
 	if (err == -EAGAIN)
 		/* Replay the request. */
 		goto replay;
@@ -1947,12 +2057,13 @@ static int tc_dump_chain(struct sk_buff *skb, struct netlink_callback *cb)
 	if (nlmsg_len(cb->nlh) < sizeof(*tcm))
 		return skb->len;
 
-	err = nlmsg_parse(cb->nlh, sizeof(*tcm), tca, TCA_MAX, NULL, NULL);
+	err = nlmsg_parse(cb->nlh, sizeof(*tcm), tca, TCA_MAX, rtm_tca_policy,
+			  cb->extack);
 	if (err)
 		return err;
 
 	if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK) {
-		block = tcf_block_lookup(net, tcm->tcm_block_index);
+		block = tcf_block_refcnt_get(net, tcm->tcm_block_index);
 		if (!block)
 			goto out;
 		/* If we work with block index, q is NULL and parent value
@@ -2019,6 +2130,8 @@ static int tc_dump_chain(struct sk_buff *skb, struct netlink_callback *cb)
 		index++;
 	}
 
+	if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK)
+		tcf_block_refcnt_put(block);
 	cb->args[0] = index;
 
 out:
@@ -2211,6 +2324,7 @@ static __net_init int tcf_net_init(struct net *net)
 {
 	struct tcf_net *tn = net_generic(net, tcf_net_id);
 
+	spin_lock_init(&tn->idr_lock);
 	idr_init(&tn->idr);
 	return 0;
 }
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 6fd9bdd93796..9aada2d0ef06 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -98,7 +98,7 @@ struct cls_fl_filter {
 	struct list_head list;
 	u32 handle;
 	u32 flags;
-	unsigned int in_hw_count;
+	u32 in_hw_count;
 	struct rcu_work rwork;
 	struct net_device *hw_dev;
 };
@@ -993,7 +993,7 @@ static int fl_init_mask_hashtable(struct fl_flow_mask *mask)
 }
 
 #define FL_KEY_MEMBER_OFFSET(member) offsetof(struct fl_flow_key, member)
-#define FL_KEY_MEMBER_SIZE(member) (sizeof(((struct fl_flow_key *) 0)->member))
+#define FL_KEY_MEMBER_SIZE(member) FIELD_SIZEOF(struct fl_flow_key, member)
 
 #define FL_KEY_IS_MASKED(mask, member)						\
 	memchr_inv(((char *)mask) + FL_KEY_MEMBER_OFFSET(member),		\
@@ -1880,6 +1880,9 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, void *fh,
 	if (f->flags && nla_put_u32(skb, TCA_FLOWER_FLAGS, f->flags))
 		goto nla_put_failure;
 
+	if (nla_put_u32(skb, TCA_FLOWER_IN_HW_COUNT, f->in_hw_count))
+		goto nla_put_failure;
+
 	if (tcf_exts_dump(skb, &f->exts))
 		goto nla_put_failure;
 
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index f218ccf1e2d9..4b28fd44576d 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -68,7 +68,6 @@ struct tc_u_knode {
 	u32			mask;
 	u32 __percpu		*pcpu_success;
 #endif
-	struct tcf_proto	*tp;
 	struct rcu_work		rwork;
 	/* The 'sel' field MUST be the last field in structure to allow for
 	 * tc_u32_keys allocated at end of structure.
@@ -80,10 +79,10 @@ struct tc_u_hnode {
 	struct tc_u_hnode __rcu	*next;
 	u32			handle;
 	u32			prio;
-	struct tc_u_common	*tp_c;
 	int			refcnt;
 	unsigned int		divisor;
 	struct idr		handle_idr;
+	bool			is_root;
 	struct rcu_head		rcu;
 	u32			flags;
 	/* The 'ht' field MUST be the last field in structure to allow for
@@ -98,7 +97,7 @@ struct tc_u_common {
 	int			refcnt;
 	struct idr		handle_idr;
 	struct hlist_node	hnode;
-	struct rcu_head		rcu;
+	long			knodes;
 };
 
 static inline unsigned int u32_hash_fold(__be32 key,
@@ -344,19 +343,16 @@ static void *tc_u_common_ptr(const struct tcf_proto *tp)
 		return block->q;
 }
 
-static unsigned int tc_u_hash(const struct tcf_proto *tp)
+static struct hlist_head *tc_u_hash(void *key)
 {
-	return hash_ptr(tc_u_common_ptr(tp), U32_HASH_SHIFT);
+	return tc_u_common_hash + hash_ptr(key, U32_HASH_SHIFT);
 }
 
-static struct tc_u_common *tc_u_common_find(const struct tcf_proto *tp)
+static struct tc_u_common *tc_u_common_find(void *key)
 {
 	struct tc_u_common *tc;
-	unsigned int h;
-
-	h = tc_u_hash(tp);
-	hlist_for_each_entry(tc, &tc_u_common_hash[h], hnode) {
-		if (tc->ptr == tc_u_common_ptr(tp))
+	hlist_for_each_entry(tc, tc_u_hash(key), hnode) {
+		if (tc->ptr == key)
 			return tc;
 	}
 	return NULL;
@@ -365,10 +361,8 @@ static struct tc_u_common *tc_u_common_find(const struct tcf_proto *tp)
 static int u32_init(struct tcf_proto *tp)
 {
 	struct tc_u_hnode *root_ht;
-	struct tc_u_common *tp_c;
-	unsigned int h;
-
-	tp_c = tc_u_common_find(tp);
+	void *key = tc_u_common_ptr(tp);
+	struct tc_u_common *tp_c = tc_u_common_find(key);
 
 	root_ht = kzalloc(sizeof(*root_ht), GFP_KERNEL);
 	if (root_ht == NULL)
@@ -377,6 +371,7 @@ static int u32_init(struct tcf_proto *tp)
 	root_ht->refcnt++;
 	root_ht->handle = tp_c ? gen_new_htid(tp_c, root_ht) : 0x80000000;
 	root_ht->prio = tp->prio;
+	root_ht->is_root = true;
 	idr_init(&root_ht->handle_idr);
 
 	if (tp_c == NULL) {
@@ -385,26 +380,24 @@ static int u32_init(struct tcf_proto *tp)
 			kfree(root_ht);
 			return -ENOBUFS;
 		}
-		tp_c->ptr = tc_u_common_ptr(tp);
+		tp_c->ptr = key;
 		INIT_HLIST_NODE(&tp_c->hnode);
 		idr_init(&tp_c->handle_idr);
 
-		h = tc_u_hash(tp);
-		hlist_add_head(&tp_c->hnode, &tc_u_common_hash[h]);
+		hlist_add_head(&tp_c->hnode, tc_u_hash(key));
 	}
 
 	tp_c->refcnt++;
 	RCU_INIT_POINTER(root_ht->next, tp_c->hlist);
 	rcu_assign_pointer(tp_c->hlist, root_ht);
-	root_ht->tp_c = tp_c;
 
+	root_ht->refcnt++;
 	rcu_assign_pointer(tp->root, root_ht);
 	tp->data = tp_c;
 	return 0;
 }
 
-static int u32_destroy_key(struct tcf_proto *tp, struct tc_u_knode *n,
-			   bool free_pf)
+static int u32_destroy_key(struct tc_u_knode *n, bool free_pf)
 {
 	struct tc_u_hnode *ht = rtnl_dereference(n->ht_down);
 
@@ -438,7 +431,7 @@ static void u32_delete_key_work(struct work_struct *work)
 					      struct tc_u_knode,
 					      rwork);
 	rtnl_lock();
-	u32_destroy_key(key->tp, key, false);
+	u32_destroy_key(key, false);
 	rtnl_unlock();
 }
 
@@ -455,12 +448,13 @@ static void u32_delete_key_freepf_work(struct work_struct *work)
 					      struct tc_u_knode,
 					      rwork);
 	rtnl_lock();
-	u32_destroy_key(key->tp, key, true);
+	u32_destroy_key(key, true);
 	rtnl_unlock();
 }
 
 static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode *key)
 {
+	struct tc_u_common *tp_c = tp->data;
 	struct tc_u_knode __rcu **kp;
 	struct tc_u_knode *pkp;
 	struct tc_u_hnode *ht = rtnl_dereference(key->ht_up);
@@ -471,6 +465,7 @@ static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode *key)
 		     kp = &pkp->next, pkp = rtnl_dereference(*kp)) {
 			if (pkp == key) {
 				RCU_INIT_POINTER(*kp, key->next);
+				tp_c->knodes--;
 
 				tcf_unbind_filter(tp, &key->res);
 				idr_remove(&ht->handle_idr, key->handle);
@@ -585,6 +580,7 @@ static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n,
 static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht,
 			    struct netlink_ext_ack *extack)
 {
+	struct tc_u_common *tp_c = tp->data;
 	struct tc_u_knode *n;
 	unsigned int h;
 
@@ -592,13 +588,14 @@ static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht,
 		while ((n = rtnl_dereference(ht->ht[h])) != NULL) {
 			RCU_INIT_POINTER(ht->ht[h],
 					 rtnl_dereference(n->next));
+			tp_c->knodes--;
 			tcf_unbind_filter(tp, &n->res);
 			u32_remove_hw_knode(tp, n, extack);
 			idr_remove(&ht->handle_idr, n->handle);
 			if (tcf_exts_get_net(&n->exts))
 				tcf_queue_work(&n->rwork, u32_delete_key_freepf_work);
 			else
-				u32_destroy_key(n->tp, n, true);
+				u32_destroy_key(n, true);
 		}
 	}
 }
@@ -610,7 +607,7 @@ static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht,
 	struct tc_u_hnode __rcu **hn;
 	struct tc_u_hnode *phn;
 
-	WARN_ON(ht->refcnt);
+	WARN_ON(--ht->refcnt);
 
 	u32_clear_hnode(tp, ht, extack);
 
@@ -631,17 +628,6 @@ static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht,
 	return -ENOENT;
 }
 
-static bool ht_empty(struct tc_u_hnode *ht)
-{
-	unsigned int h;
-
-	for (h = 0; h <= ht->divisor; h++)
-		if (rcu_access_pointer(ht->ht[h]))
-			return false;
-
-	return true;
-}
-
 static void u32_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
 {
 	struct tc_u_common *tp_c = tp->data;
@@ -649,7 +635,7 @@ static void u32_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
 
 	WARN_ON(root_ht == NULL);
 
-	if (root_ht && --root_ht->refcnt == 0)
+	if (root_ht && --root_ht->refcnt == 1)
 		u32_destroy_hnode(tp, root_ht, extack);
 
 	if (--tp_c->refcnt == 0) {
@@ -679,26 +665,21 @@ static int u32_delete(struct tcf_proto *tp, void *arg, bool *last,
 		      struct netlink_ext_ack *extack)
 {
 	struct tc_u_hnode *ht = arg;
-	struct tc_u_hnode *root_ht = rtnl_dereference(tp->root);
 	struct tc_u_common *tp_c = tp->data;
 	int ret = 0;
 
-	if (ht == NULL)
-		goto out;
-
 	if (TC_U32_KEY(ht->handle)) {
 		u32_remove_hw_knode(tp, (struct tc_u_knode *)ht, extack);
 		ret = u32_delete_key(tp, (struct tc_u_knode *)ht);
 		goto out;
 	}
 
-	if (root_ht == ht) {
+	if (ht->is_root) {
 		NL_SET_ERR_MSG_MOD(extack, "Not allowed to delete root node");
 		return -EINVAL;
 	}
 
 	if (ht->refcnt == 1) {
-		ht->refcnt--;
 		u32_destroy_hnode(tp, ht, extack);
 	} else {
 		NL_SET_ERR_MSG_MOD(extack, "Can not delete in-use filter");
@@ -706,38 +687,7 @@ static int u32_delete(struct tcf_proto *tp, void *arg, bool *last,
 	}
 
 out:
-	*last = true;
-	if (root_ht) {
-		if (root_ht->refcnt > 1) {
-			*last = false;
-			goto ret;
-		}
-		if (root_ht->refcnt == 1) {
-			if (!ht_empty(root_ht)) {
-				*last = false;
-				goto ret;
-			}
-		}
-	}
-
-	if (tp_c->refcnt > 1) {
-		*last = false;
-		goto ret;
-	}
-
-	if (tp_c->refcnt == 1) {
-		struct tc_u_hnode *ht;
-
-		for (ht = rtnl_dereference(tp_c->hlist);
-		     ht;
-		     ht = rtnl_dereference(ht->next))
-			if (!ht_empty(ht)) {
-				*last = false;
-				break;
-			}
-	}
-
-ret:
+	*last = tp_c->refcnt == 1 && tp_c->knodes == 0;
 	return ret;
 }
 
@@ -768,7 +718,7 @@ static const struct nla_policy u32_policy[TCA_U32_MAX + 1] = {
 };
 
 static int u32_set_parms(struct net *net, struct tcf_proto *tp,
-			 unsigned long base, struct tc_u_hnode *ht,
+			 unsigned long base,
 			 struct tc_u_knode *n, struct nlattr **tb,
 			 struct nlattr *est, bool ovr,
 			 struct netlink_ext_ack *extack)
@@ -789,12 +739,16 @@ static int u32_set_parms(struct net *net, struct tcf_proto *tp,
 		}
 
 		if (handle) {
-			ht_down = u32_lookup_ht(ht->tp_c, handle);
+			ht_down = u32_lookup_ht(tp->data, handle);
 
 			if (!ht_down) {
 				NL_SET_ERR_MSG_MOD(extack, "Link hash table not found");
 				return -EINVAL;
 			}
+			if (ht_down->is_root) {
+				NL_SET_ERR_MSG_MOD(extack, "Not linking to root node");
+				return -EINVAL;
+			}
 			ht_down->refcnt++;
 		}
 
@@ -891,7 +845,6 @@ static struct tc_u_knode *u32_init_knode(struct tcf_proto *tp,
 	/* Similarly success statistics must be moved as pointers */
 	new->pcpu_success = n->pcpu_success;
 #endif
-	new->tp = tp;
 	memcpy(&new->sel, s, sizeof(*s) + s->nkeys*sizeof(struct tc_u32_key));
 
 	if (tcf_exts_init(&new->exts, TCA_U32_ACT, TCA_U32_POLICE)) {
@@ -960,18 +913,17 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
 		if (!new)
 			return -ENOMEM;
 
-		err = u32_set_parms(net, tp, base,
-				    rtnl_dereference(n->ht_up), new, tb,
+		err = u32_set_parms(net, tp, base, new, tb,
 				    tca[TCA_RATE], ovr, extack);
 
 		if (err) {
-			u32_destroy_key(tp, new, false);
+			u32_destroy_key(new, false);
 			return err;
 		}
 
 		err = u32_replace_hw_knode(tp, new, flags, extack);
 		if (err) {
-			u32_destroy_key(tp, new, false);
+			u32_destroy_key(new, false);
 			return err;
 		}
 
@@ -988,7 +940,11 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
 	if (tb[TCA_U32_DIVISOR]) {
 		unsigned int divisor = nla_get_u32(tb[TCA_U32_DIVISOR]);
 
-		if (--divisor > 0x100) {
+		if (!is_power_of_2(divisor)) {
+			NL_SET_ERR_MSG_MOD(extack, "Divisor is not a power of 2");
+			return -EINVAL;
+		}
+		if (divisor-- > 0x100) {
 			NL_SET_ERR_MSG_MOD(extack, "Exceeded maximum 256 hash buckets");
 			return -EINVAL;
 		}
@@ -1013,7 +969,6 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
 				return err;
 			}
 		}
-		ht->tp_c = tp_c;
 		ht->refcnt = 1;
 		ht->divisor = divisor;
 		ht->handle = handle;
@@ -1103,7 +1058,6 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
 	n->handle = handle;
 	n->fshift = s->hmask ? ffs(ntohl(s->hmask)) - 1 : 0;
 	n->flags = flags;
-	n->tp = tp;
 
 	err = tcf_exts_init(&n->exts, TCA_U32_ACT, TCA_U32_POLICE);
 	if (err < 0)
@@ -1125,7 +1079,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
 	}
 #endif
 
-	err = u32_set_parms(net, tp, base, ht, n, tb, tca[TCA_RATE], ovr,
+	err = u32_set_parms(net, tp, base, n, tb, tca[TCA_RATE], ovr,
 			    extack);
 	if (err == 0) {
 		struct tc_u_knode __rcu **ins;
@@ -1146,6 +1100,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
 
 		RCU_INIT_POINTER(n->next, pins);
 		rcu_assign_pointer(*ins, n);
+		tp_c->knodes++;
 		*arg = n;
 		return 0;
 	}
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 98541c6399db..022bca98bde6 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -27,7 +27,6 @@
 #include <linux/kmod.h>
 #include <linux/list.h>
 #include <linux/hrtimer.h>
-#include <linux/lockdep.h>
 #include <linux/slab.h>
 #include <linux/hashtable.h>
 
@@ -315,6 +314,24 @@ out:
 	return q;
 }
 
+struct Qdisc *qdisc_lookup_rcu(struct net_device *dev, u32 handle)
+{
+	struct netdev_queue *nq;
+	struct Qdisc *q;
+
+	if (!handle)
+		return NULL;
+	q = qdisc_match_from_root(dev->qdisc, handle);
+	if (q)
+		goto out;
+
+	nq = dev_ingress_queue_rcu(dev);
+	if (nq)
+		q = qdisc_match_from_root(nq->qdisc_sleeping, handle);
+out:
+	return q;
+}
+
 static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
 {
 	unsigned long cl;
@@ -921,7 +938,7 @@ static void notify_and_destroy(struct net *net, struct sk_buff *skb,
 		qdisc_notify(net, skb, n, clid, old, new);
 
 	if (old)
-		qdisc_destroy(old);
+		qdisc_put(old);
 }
 
 /* Graft qdisc "new" to class "classid" of qdisc "parent" or
@@ -974,7 +991,7 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
 				qdisc_refcount_inc(new);
 
 			if (!ingress)
-				qdisc_destroy(old);
+				qdisc_put(old);
 		}
 
 skip:
@@ -1053,10 +1070,6 @@ static int qdisc_block_indexes_set(struct Qdisc *sch, struct nlattr **tca,
 	return 0;
 }
 
-/* lockdep annotation is needed for ingress; egress gets it only for name */
-static struct lock_class_key qdisc_tx_lock;
-static struct lock_class_key qdisc_rx_lock;
-
 /*
    Allocate and initialize new qdisc.
 
@@ -1121,7 +1134,6 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
 	if (handle == TC_H_INGRESS) {
 		sch->flags |= TCQ_F_INGRESS;
 		handle = TC_H_MAKE(TC_H_INGRESS, 0);
-		lockdep_set_class(qdisc_lock(sch), &qdisc_rx_lock);
 	} else {
 		if (handle == 0) {
 			handle = qdisc_alloc_handle(dev);
@@ -1129,7 +1141,6 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
 			if (handle == 0)
 				goto err_out3;
 		}
-		lockdep_set_class(qdisc_lock(sch), &qdisc_tx_lock);
 		if (!netif_is_multiqueue(dev))
 			sch->flags |= TCQ_F_ONETXQUEUE;
 	}
@@ -1307,6 +1318,18 @@ check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
 	return 0;
 }
 
+const struct nla_policy rtm_tca_policy[TCA_MAX + 1] = {
+	[TCA_KIND]		= { .type = NLA_STRING },
+	[TCA_OPTIONS]		= { .type = NLA_NESTED },
+	[TCA_RATE]		= { .type = NLA_BINARY,
+				    .len = sizeof(struct tc_estimator) },
+	[TCA_STAB]		= { .type = NLA_NESTED },
+	[TCA_DUMP_INVISIBLE]	= { .type = NLA_FLAG },
+	[TCA_CHAIN]		= { .type = NLA_U32 },
+	[TCA_INGRESS_BLOCK]	= { .type = NLA_U32 },
+	[TCA_EGRESS_BLOCK]	= { .type = NLA_U32 },
+};
+
 /*
  * Delete/get qdisc.
  */
@@ -1327,7 +1350,8 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
 	    !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
 		return -EPERM;
 
-	err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL, extack);
+	err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, rtm_tca_policy,
+			  extack);
 	if (err < 0)
 		return err;
 
@@ -1411,7 +1435,8 @@ static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
 
 replay:
 	/* Reinit, just in case something touches this. */
-	err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL, extack);
+	err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, rtm_tca_policy,
+			  extack);
 	if (err < 0)
 		return err;
 
@@ -1568,7 +1593,7 @@ graft:
 	err = qdisc_graft(dev, p, skb, n, clid, q, NULL, extack);
 	if (err) {
 		if (q)
-			qdisc_destroy(q);
+			qdisc_put(q);
 		return err;
 	}
 
@@ -1645,7 +1670,8 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
 	idx = 0;
 	ASSERT_RTNL();
 
-	err = nlmsg_parse(nlh, sizeof(struct tcmsg), tca, TCA_MAX, NULL, NULL);
+	err = nlmsg_parse(nlh, sizeof(struct tcmsg), tca, TCA_MAX,
+			  rtm_tca_policy, cb->extack);
 	if (err < 0)
 		return err;
 
@@ -1864,7 +1890,8 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
 	    !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
 		return -EPERM;
 
-	err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL, extack);
+	err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, rtm_tca_policy,
+			  extack);
 	if (err < 0)
 		return err;
 
@@ -2043,7 +2070,8 @@ static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
 
 	if (tcm->tcm_parent) {
 		q = qdisc_match_from_root(root, TC_H_MAJ(tcm->tcm_parent));
-		if (q && tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
+		if (q && q != root &&
+		    tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
 			return -1;
 		return 0;
 	}
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index cd49afca9617..d714d3747bcb 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -150,7 +150,7 @@ static void atm_tc_put(struct Qdisc *sch, unsigned long cl)
 	pr_debug("atm_tc_put: destroying\n");
 	list_del_init(&flow->list);
 	pr_debug("atm_tc_put: qdisc %p\n", flow->q);
-	qdisc_destroy(flow->q);
+	qdisc_put(flow->q);
 	tcf_block_put(flow->block);
 	if (flow->sock) {
 		pr_debug("atm_tc_put: f_count %ld\n",
diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c
index c07c30b916d5..b910cd5c56f7 100644
--- a/net/sched/sch_cake.c
+++ b/net/sched/sch_cake.c
@@ -812,7 +812,7 @@ static struct sk_buff *dequeue_head(struct cake_flow *flow)
 
 	if (skb) {
 		flow->head = skb->next;
-		skb->next = NULL;
+		skb_mark_not_on_list(skb);
 	}
 
 	return skb;
@@ -1252,7 +1252,7 @@ found:
 	else
 		flow->head = elig_ack->next;
 
-	elig_ack->next = NULL;
+	skb_mark_not_on_list(elig_ack);
 
 	return elig_ack;
 }
@@ -1675,7 +1675,7 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 
 		while (segs) {
 			nskb = segs->next;
-			segs->next = NULL;
+			skb_mark_not_on_list(segs);
 			qdisc_skb_cb(segs)->pkt_len = segs->len;
 			cobalt_set_enqueue_time(segs, now);
 			get_cobalt_cb(segs)->adjusted_len = cake_overhead(q,
@@ -2644,7 +2644,7 @@ static int cake_init(struct Qdisc *sch, struct nlattr *opt,
 	for (i = 1; i <= CAKE_QUEUES; i++)
 		quantum_div[i] = 65535 / i;
 
-	q->tins = kvzalloc(CAKE_MAX_TINS * sizeof(struct cake_tin_data),
+	q->tins = kvcalloc(CAKE_MAX_TINS, sizeof(struct cake_tin_data),
 			   GFP_KERNEL);
 	if (!q->tins)
 		goto nomem;
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index f42025d53cfe..4dc05409e3fb 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -1418,7 +1418,7 @@ static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
 	WARN_ON(cl->filters);
 
 	tcf_block_put(cl->block);
-	qdisc_destroy(cl->q);
+	qdisc_put(cl->q);
 	qdisc_put_rtab(cl->R_tab);
 	gen_kill_estimator(&cl->rate_est);
 	if (cl != &q->link)
diff --git a/net/sched/sch_cbs.c b/net/sched/sch_cbs.c
index e26a24017faa..e689e11b6d0f 100644
--- a/net/sched/sch_cbs.c
+++ b/net/sched/sch_cbs.c
@@ -379,7 +379,7 @@ static void cbs_destroy(struct Qdisc *sch)
 	cbs_disable_offload(dev, q);
 
 	if (q->qdisc)
-		qdisc_destroy(q->qdisc);
+		qdisc_put(q->qdisc);
 }
 
 static int cbs_dump(struct Qdisc *sch, struct sk_buff *skb)
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index e0b0cf8a9939..cdebaed0f8cf 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -134,7 +134,7 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
 					    tca[TCA_RATE]);
 		if (err) {
 			NL_SET_ERR_MSG(extack, "Failed to replace estimator");
-			qdisc_destroy(cl->qdisc);
+			qdisc_put(cl->qdisc);
 			kfree(cl);
 			return err;
 		}
@@ -153,7 +153,7 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
 static void drr_destroy_class(struct Qdisc *sch, struct drr_class *cl)
 {
 	gen_kill_estimator(&cl->rate_est);
-	qdisc_destroy(cl->qdisc);
+	qdisc_put(cl->qdisc);
 	kfree(cl);
 }
 
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index 049714c57075..f6f480784bc6 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -412,7 +412,7 @@ static void dsmark_destroy(struct Qdisc *sch)
 	pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p);
 
 	tcf_block_put(p->block);
-	qdisc_destroy(p->q);
+	qdisc_put(p->q);
 	if (p->mv != p->embedded)
 		kfree(p->mv);
 }
diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c
index 24893d3b5d22..3809c9bf8896 100644
--- a/net/sched/sch_fifo.c
+++ b/net/sched/sch_fifo.c
@@ -177,7 +177,7 @@ struct Qdisc *fifo_create_dflt(struct Qdisc *sch, struct Qdisc_ops *ops,
 	if (q) {
 		err = fifo_set_limit(q, limit);
 		if (err < 0) {
-			qdisc_destroy(q);
+			qdisc_put(q);
 			q = NULL;
 		}
 	}
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index 4808713c73b9..4b1af706896c 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -92,8 +92,8 @@ struct fq_sched_data {
 	u32		quantum;
 	u32		initial_quantum;
 	u32		flow_refill_delay;
-	u32		flow_max_rate;	/* optional max rate per flow */
 	u32		flow_plimit;	/* max packets per flow */
+	unsigned long	flow_max_rate;	/* optional max rate per flow */
 	u32		orphan_mask;	/* mask for orphaned skb */
 	u32		low_rate_threshold;
 	struct rb_root	*fq_root;
@@ -106,7 +106,6 @@ struct fq_sched_data {
 
 	u64		stat_gc_flows;
 	u64		stat_internal_packets;
-	u64		stat_tcp_retrans;
 	u64		stat_throttled;
 	u64		stat_flows_plimit;
 	u64		stat_pkts_too_long;
@@ -319,7 +318,7 @@ static struct sk_buff *fq_dequeue_head(struct Qdisc *sch, struct fq_flow *flow)
 
 	if (skb) {
 		flow->head = skb->next;
-		skb->next = NULL;
+		skb_mark_not_on_list(skb);
 		flow->qlen--;
 		qdisc_qstats_backlog_dec(sch, skb);
 		sch->q.qlen--;
@@ -327,62 +326,17 @@ static struct sk_buff *fq_dequeue_head(struct Qdisc *sch, struct fq_flow *flow)
 	return skb;
 }
 
-/* We might add in the future detection of retransmits
- * For the time being, just return false
- */
-static bool skb_is_retransmit(struct sk_buff *skb)
-{
-	return false;
-}
-
-/* add skb to flow queue
- * flow queue is a linked list, kind of FIFO, except for TCP retransmits
- * We special case tcp retransmits to be transmitted before other packets.
- * We rely on fact that TCP retransmits are unlikely, so we do not waste
- * a separate queue or a pointer.
- * head->  [retrans pkt 1]
- *         [retrans pkt 2]
- *         [ normal pkt 1]
- *         [ normal pkt 2]
- *         [ normal pkt 3]
- * tail->  [ normal pkt 4]
- */
 static void flow_queue_add(struct fq_flow *flow, struct sk_buff *skb)
 {
-	struct sk_buff *prev, *head = flow->head;
+	struct sk_buff *head = flow->head;
 
 	skb->next = NULL;
-	if (!head) {
+	if (!head)
 		flow->head = skb;
-		flow->tail = skb;
-		return;
-	}
-	if (likely(!skb_is_retransmit(skb))) {
+	else
 		flow->tail->next = skb;
-		flow->tail = skb;
-		return;
-	}
 
-	/* This skb is a tcp retransmit,
-	 * find the last retrans packet in the queue
-	 */
-	prev = NULL;
-	while (skb_is_retransmit(head)) {
-		prev = head;
-		head = head->next;
-		if (!head)
-			break;
-	}
-	if (!prev) { /* no rtx packet in queue, become the new head */
-		skb->next = flow->head;
-		flow->head = skb;
-	} else {
-		if (prev == flow->tail)
-			flow->tail = skb;
-		else
-			skb->next = prev->next;
-		prev->next = skb;
-	}
+	flow->tail = skb;
 }
 
 static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
@@ -401,8 +355,6 @@ static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 	}
 
 	f->qlen++;
-	if (skb_is_retransmit(skb))
-		q->stat_tcp_retrans++;
 	qdisc_qstats_backlog_inc(sch, skb);
 	if (fq_flow_is_detached(f)) {
 		struct sock *sk = skb->sk;
@@ -464,7 +416,8 @@ static struct sk_buff *fq_dequeue(struct Qdisc *sch)
 	struct fq_flow_head *head;
 	struct sk_buff *skb;
 	struct fq_flow *f;
-	u32 rate, plen;
+	unsigned long rate;
+	u32 plen;
 
 	skb = fq_dequeue_head(sch, &q->internal);
 	if (skb)
@@ -491,11 +444,16 @@ begin:
 	}
 
 	skb = f->head;
-	if (unlikely(skb && now < f->time_next_packet &&
-		     !skb_is_tcp_pure_ack(skb))) {
-		head->first = f->next;
-		fq_flow_set_throttled(q, f);
-		goto begin;
+	if (skb) {
+		u64 time_next_packet = max_t(u64, ktime_to_ns(skb->tstamp),
+					     f->time_next_packet);
+
+		if (now < time_next_packet) {
+			head->first = f->next;
+			f->time_next_packet = time_next_packet;
+			fq_flow_set_throttled(q, f);
+			goto begin;
+		}
 	}
 
 	skb = fq_dequeue_head(sch, f);
@@ -513,11 +471,7 @@ begin:
 	prefetch(&skb->end);
 	f->credit -= qdisc_pkt_len(skb);
 
-	if (!q->rate_enable)
-		goto out;
-
-	/* Do not pace locally generated ack packets */
-	if (skb_is_tcp_pure_ack(skb))
+	if (ktime_to_ns(skb->tstamp) || !q->rate_enable)
 		goto out;
 
 	rate = q->flow_max_rate;
@@ -532,11 +486,11 @@ begin:
 		if (f->credit > 0)
 			goto out;
 	}
-	if (rate != ~0U) {
+	if (rate != ~0UL) {
 		u64 len = (u64)plen * NSEC_PER_SEC;
 
 		if (likely(rate))
-			do_div(len, rate);
+			len = div64_ul(len, rate);
 		/* Since socket rate can change later,
 		 * clamp the delay to 1 second.
 		 * Really, providers of too big packets should be fixed !
@@ -748,9 +702,11 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt,
 		pr_warn_ratelimited("sch_fq: defrate %u ignored.\n",
 				    nla_get_u32(tb[TCA_FQ_FLOW_DEFAULT_RATE]));
 
-	if (tb[TCA_FQ_FLOW_MAX_RATE])
-		q->flow_max_rate = nla_get_u32(tb[TCA_FQ_FLOW_MAX_RATE]);
+	if (tb[TCA_FQ_FLOW_MAX_RATE]) {
+		u32 rate = nla_get_u32(tb[TCA_FQ_FLOW_MAX_RATE]);
 
+		q->flow_max_rate = (rate == ~0U) ? ~0UL : rate;
+	}
 	if (tb[TCA_FQ_LOW_RATE_THRESHOLD])
 		q->low_rate_threshold =
 			nla_get_u32(tb[TCA_FQ_LOW_RATE_THRESHOLD]);
@@ -813,7 +769,7 @@ static int fq_init(struct Qdisc *sch, struct nlattr *opt,
 	q->quantum		= 2 * psched_mtu(qdisc_dev(sch));
 	q->initial_quantum	= 10 * psched_mtu(qdisc_dev(sch));
 	q->flow_refill_delay	= msecs_to_jiffies(40);
-	q->flow_max_rate	= ~0U;
+	q->flow_max_rate	= ~0UL;
 	q->time_next_delayed_flow = ~0ULL;
 	q->rate_enable		= 1;
 	q->new_flows.first	= NULL;
@@ -823,7 +779,7 @@ static int fq_init(struct Qdisc *sch, struct nlattr *opt,
 	q->fq_trees_log		= ilog2(1024);
 	q->orphan_mask		= 1024 - 1;
 	q->low_rate_threshold	= 550000 / 8;
-	qdisc_watchdog_init(&q->watchdog, sch);
+	qdisc_watchdog_init_clockid(&q->watchdog, sch, CLOCK_MONOTONIC);
 
 	if (opt)
 		err = fq_change(sch, opt, extack);
@@ -849,7 +805,8 @@ static int fq_dump(struct Qdisc *sch, struct sk_buff *skb)
 	    nla_put_u32(skb, TCA_FQ_QUANTUM, q->quantum) ||
 	    nla_put_u32(skb, TCA_FQ_INITIAL_QUANTUM, q->initial_quantum) ||
 	    nla_put_u32(skb, TCA_FQ_RATE_ENABLE, q->rate_enable) ||
-	    nla_put_u32(skb, TCA_FQ_FLOW_MAX_RATE, q->flow_max_rate) ||
+	    nla_put_u32(skb, TCA_FQ_FLOW_MAX_RATE,
+			min_t(unsigned long, q->flow_max_rate, ~0U)) ||
 	    nla_put_u32(skb, TCA_FQ_FLOW_REFILL_DELAY,
 			jiffies_to_usecs(q->flow_refill_delay)) ||
 	    nla_put_u32(skb, TCA_FQ_ORPHAN_MASK, q->orphan_mask) ||
@@ -873,7 +830,7 @@ static int fq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
 
 	st.gc_flows		  = q->stat_gc_flows;
 	st.highprio_packets	  = q->stat_internal_packets;
-	st.tcp_retrans		  = q->stat_tcp_retrans;
+	st.tcp_retrans		  = 0;
 	st.throttled		  = q->stat_throttled;
 	st.flows_plimit		  = q->stat_flows_plimit;
 	st.pkts_too_long	  = q->stat_pkts_too_long;
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index 6c0a9d5dbf94..cd04d40c30b6 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -124,7 +124,7 @@ static inline struct sk_buff *dequeue_head(struct fq_codel_flow *flow)
 	struct sk_buff *skb = flow->head;
 
 	flow->head = skb->next;
-	skb->next = NULL;
+	skb_mark_not_on_list(skb);
 	return skb;
 }
 
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 69078c82963e..de1663f7d3ad 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -184,7 +184,7 @@ static void try_bulk_dequeue_skb(struct Qdisc *q,
 		skb = nskb;
 		(*packets)++; /* GSO counts as one pkt */
 	}
-	skb->next = NULL;
+	skb_mark_not_on_list(skb);
 }
 
 /* This variant of try_bulk_dequeue_skb() makes sure
@@ -210,7 +210,7 @@ static void try_bulk_dequeue_skb_slow(struct Qdisc *q,
 		skb = nskb;
 	} while (++cnt < 8);
 	(*packets) += cnt;
-	skb->next = NULL;
+	skb_mark_not_on_list(skb);
 }
 
 /* Note that dequeue_skb can possibly return a SKB list (via skb->next).
@@ -572,6 +572,18 @@ struct Qdisc noop_qdisc = {
 	.dev_queue	=	&noop_netdev_queue,
 	.running	=	SEQCNT_ZERO(noop_qdisc.running),
 	.busylock	=	__SPIN_LOCK_UNLOCKED(noop_qdisc.busylock),
+	.gso_skb = {
+		.next = (struct sk_buff *)&noop_qdisc.gso_skb,
+		.prev = (struct sk_buff *)&noop_qdisc.gso_skb,
+		.qlen = 0,
+		.lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.gso_skb.lock),
+	},
+	.skb_bad_txq = {
+		.next = (struct sk_buff *)&noop_qdisc.skb_bad_txq,
+		.prev = (struct sk_buff *)&noop_qdisc.skb_bad_txq,
+		.qlen = 0,
+		.lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.skb_bad_txq.lock),
+	},
 };
 EXPORT_SYMBOL(noop_qdisc);
 
@@ -901,7 +913,7 @@ struct Qdisc *qdisc_create_dflt(struct netdev_queue *dev_queue,
 	if (!ops->init || ops->init(sch, NULL, extack) == 0)
 		return sch;
 
-	qdisc_destroy(sch);
+	qdisc_put(sch);
 	return NULL;
 }
 EXPORT_SYMBOL(qdisc_create_dflt);
@@ -941,15 +953,18 @@ void qdisc_free(struct Qdisc *qdisc)
 	kfree((char *) qdisc - qdisc->padded);
 }
 
-void qdisc_destroy(struct Qdisc *qdisc)
+static void qdisc_free_cb(struct rcu_head *head)
+{
+	struct Qdisc *q = container_of(head, struct Qdisc, rcu);
+
+	qdisc_free(q);
+}
+
+static void qdisc_destroy(struct Qdisc *qdisc)
 {
 	const struct Qdisc_ops  *ops = qdisc->ops;
 	struct sk_buff *skb, *tmp;
 
-	if (qdisc->flags & TCQ_F_BUILTIN ||
-	    !refcount_dec_and_test(&qdisc->refcnt))
-		return;
-
 #ifdef CONFIG_NET_SCHED
 	qdisc_hash_del(qdisc);
 
@@ -974,9 +989,34 @@ void qdisc_destroy(struct Qdisc *qdisc)
 		kfree_skb_list(skb);
 	}
 
-	qdisc_free(qdisc);
+	call_rcu(&qdisc->rcu, qdisc_free_cb);
+}
+
+void qdisc_put(struct Qdisc *qdisc)
+{
+	if (qdisc->flags & TCQ_F_BUILTIN ||
+	    !refcount_dec_and_test(&qdisc->refcnt))
+		return;
+
+	qdisc_destroy(qdisc);
+}
+EXPORT_SYMBOL(qdisc_put);
+
+/* Version of qdisc_put() that is called with rtnl mutex unlocked.
+ * Intended to be used as optimization, this function only takes rtnl lock if
+ * qdisc reference counter reached zero.
+ */
+
+void qdisc_put_unlocked(struct Qdisc *qdisc)
+{
+	if (qdisc->flags & TCQ_F_BUILTIN ||
+	    !refcount_dec_and_rtnl_lock(&qdisc->refcnt))
+		return;
+
+	qdisc_destroy(qdisc);
+	rtnl_unlock();
 }
-EXPORT_SYMBOL(qdisc_destroy);
+EXPORT_SYMBOL(qdisc_put_unlocked);
 
 /* Attach toplevel qdisc to device queue. */
 struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue,
@@ -1245,8 +1285,6 @@ static void dev_init_scheduler_queue(struct net_device *dev,
 
 	rcu_assign_pointer(dev_queue->qdisc, qdisc);
 	dev_queue->qdisc_sleeping = qdisc;
-	__skb_queue_head_init(&qdisc->gso_skb);
-	__skb_queue_head_init(&qdisc->skb_bad_txq);
 }
 
 void dev_init_scheduler(struct net_device *dev)
@@ -1270,7 +1308,7 @@ static void shutdown_scheduler_queue(struct net_device *dev,
 		rcu_assign_pointer(dev_queue->qdisc, qdisc_default);
 		dev_queue->qdisc_sleeping = qdisc_default;
 
-		qdisc_destroy(qdisc);
+		qdisc_put(qdisc);
 	}
 }
 
@@ -1279,7 +1317,7 @@ void dev_shutdown(struct net_device *dev)
 	netdev_for_each_tx_queue(dev, shutdown_scheduler_queue, &noop_qdisc);
 	if (dev_ingress_queue(dev))
 		shutdown_scheduler_queue(dev, dev_ingress_queue(dev), &noop_qdisc);
-	qdisc_destroy(dev->qdisc);
+	qdisc_put(dev->qdisc);
 	dev->qdisc = &noop_qdisc;
 
 	WARN_ON(timer_pending(&dev->watchdog_timer));
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 3278a76f6861..b18ec1f6de60 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -1092,7 +1092,7 @@ hfsc_destroy_class(struct Qdisc *sch, struct hfsc_class *cl)
 	struct hfsc_sched *q = qdisc_priv(sch);
 
 	tcf_block_put(cl->block);
-	qdisc_destroy(cl->qdisc);
+	qdisc_put(cl->qdisc);
 	gen_kill_estimator(&cl->rate_est);
 	if (cl != &q->root)
 		kfree(cl);
diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c
index c3a8388dcdf6..9d6a47697406 100644
--- a/net/sched/sch_hhf.c
+++ b/net/sched/sch_hhf.c
@@ -330,7 +330,7 @@ static struct sk_buff *dequeue_head(struct wdrr_bucket *bucket)
 	struct sk_buff *skb = bucket->head;
 
 	bucket->head = skb->next;
-	skb->next = NULL;
+	skb_mark_not_on_list(skb);
 	return skb;
 }
 
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 43c4bfe625a9..58b449490757 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -132,7 +132,7 @@ struct htb_class {
 		struct htb_class_inner {
 			struct htb_prio clprio[TC_HTB_NUMPRIO];
 		} inner;
-	} un;
+	};
 	s64			pq_key;
 
 	int			prio_activity;	/* for which prios are we active */
@@ -411,13 +411,13 @@ static void htb_activate_prios(struct htb_sched *q, struct htb_class *cl)
 			int prio = ffz(~m);
 			m &= ~(1 << prio);
 
-			if (p->un.inner.clprio[prio].feed.rb_node)
+			if (p->inner.clprio[prio].feed.rb_node)
 				/* parent already has its feed in use so that
 				 * reset bit in mask as parent is already ok
 				 */
 				mask &= ~(1 << prio);
 
-			htb_add_to_id_tree(&p->un.inner.clprio[prio].feed, cl, prio);
+			htb_add_to_id_tree(&p->inner.clprio[prio].feed, cl, prio);
 		}
 		p->prio_activity |= mask;
 		cl = p;
@@ -447,19 +447,19 @@ static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl)
 			int prio = ffz(~m);
 			m &= ~(1 << prio);
 
-			if (p->un.inner.clprio[prio].ptr == cl->node + prio) {
+			if (p->inner.clprio[prio].ptr == cl->node + prio) {
 				/* we are removing child which is pointed to from
 				 * parent feed - forget the pointer but remember
 				 * classid
 				 */
-				p->un.inner.clprio[prio].last_ptr_id = cl->common.classid;
-				p->un.inner.clprio[prio].ptr = NULL;
+				p->inner.clprio[prio].last_ptr_id = cl->common.classid;
+				p->inner.clprio[prio].ptr = NULL;
 			}
 
 			htb_safe_rb_erase(cl->node + prio,
-					  &p->un.inner.clprio[prio].feed);
+					  &p->inner.clprio[prio].feed);
 
-			if (!p->un.inner.clprio[prio].feed.rb_node)
+			if (!p->inner.clprio[prio].feed.rb_node)
 				mask |= 1 << prio;
 		}
 
@@ -555,7 +555,7 @@ htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, s64 *diff)
  */
 static inline void htb_activate(struct htb_sched *q, struct htb_class *cl)
 {
-	WARN_ON(cl->level || !cl->un.leaf.q || !cl->un.leaf.q->q.qlen);
+	WARN_ON(cl->level || !cl->leaf.q || !cl->leaf.q->q.qlen);
 
 	if (!cl->prio_activity) {
 		cl->prio_activity = 1 << cl->prio;
@@ -577,22 +577,6 @@ static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl)
 	cl->prio_activity = 0;
 }
 
-static void htb_enqueue_tail(struct sk_buff *skb, struct Qdisc *sch,
-			     struct qdisc_skb_head *qh)
-{
-	struct sk_buff *last = qh->tail;
-
-	if (last) {
-		skb->next = NULL;
-		last->next = skb;
-		qh->tail = skb;
-	} else {
-		qh->tail = skb;
-		qh->head = skb;
-	}
-	qh->qlen++;
-}
-
 static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 		       struct sk_buff **to_free)
 {
@@ -603,7 +587,7 @@ static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 	if (cl == HTB_DIRECT) {
 		/* enqueue to helper queue */
 		if (q->direct_queue.qlen < q->direct_qlen) {
-			htb_enqueue_tail(skb, sch, &q->direct_queue);
+			__qdisc_enqueue_tail(skb, &q->direct_queue);
 			q->direct_pkts++;
 		} else {
 			return qdisc_drop(skb, sch, to_free);
@@ -615,7 +599,7 @@ static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 		__qdisc_drop(skb, to_free);
 		return ret;
 #endif
-	} else if ((ret = qdisc_enqueue(skb, cl->un.leaf.q,
+	} else if ((ret = qdisc_enqueue(skb, cl->leaf.q,
 					to_free)) != NET_XMIT_SUCCESS) {
 		if (net_xmit_drop_count(ret)) {
 			qdisc_qstats_drop(sch);
@@ -823,7 +807,7 @@ static struct htb_class *htb_lookup_leaf(struct htb_prio *hprio, const int prio)
 			cl = rb_entry(*sp->pptr, struct htb_class, node[prio]);
 			if (!cl->level)
 				return cl;
-			clp = &cl->un.inner.clprio[prio];
+			clp = &cl->inner.clprio[prio];
 			(++sp)->root = clp->feed.rb_node;
 			sp->pptr = &clp->ptr;
 			sp->pid = &clp->last_ptr_id;
@@ -857,7 +841,7 @@ next:
 		 * graft operation on the leaf since last dequeue;
 		 * simply deactivate and skip such class
 		 */
-		if (unlikely(cl->un.leaf.q->q.qlen == 0)) {
+		if (unlikely(cl->leaf.q->q.qlen == 0)) {
 			struct htb_class *next;
 			htb_deactivate(q, cl);
 
@@ -873,12 +857,12 @@ next:
 			goto next;
 		}
 
-		skb = cl->un.leaf.q->dequeue(cl->un.leaf.q);
+		skb = cl->leaf.q->dequeue(cl->leaf.q);
 		if (likely(skb != NULL))
 			break;
 
-		qdisc_warn_nonwc("htb", cl->un.leaf.q);
-		htb_next_rb_node(level ? &cl->parent->un.inner.clprio[prio].ptr:
+		qdisc_warn_nonwc("htb", cl->leaf.q);
+		htb_next_rb_node(level ? &cl->parent->inner.clprio[prio].ptr:
 					 &q->hlevel[0].hprio[prio].ptr);
 		cl = htb_lookup_leaf(hprio, prio);
 
@@ -886,16 +870,16 @@ next:
 
 	if (likely(skb != NULL)) {
 		bstats_update(&cl->bstats, skb);
-		cl->un.leaf.deficit[level] -= qdisc_pkt_len(skb);
-		if (cl->un.leaf.deficit[level] < 0) {
-			cl->un.leaf.deficit[level] += cl->quantum;
-			htb_next_rb_node(level ? &cl->parent->un.inner.clprio[prio].ptr :
+		cl->leaf.deficit[level] -= qdisc_pkt_len(skb);
+		if (cl->leaf.deficit[level] < 0) {
+			cl->leaf.deficit[level] += cl->quantum;
+			htb_next_rb_node(level ? &cl->parent->inner.clprio[prio].ptr :
 						 &q->hlevel[0].hprio[prio].ptr);
 		}
 		/* this used to be after charge_class but this constelation
 		 * gives us slightly better performance
 		 */
-		if (!cl->un.leaf.q->q.qlen)
+		if (!cl->leaf.q->q.qlen)
 			htb_deactivate(q, cl);
 		htb_charge_class(q, cl, level, skb);
 	}
@@ -972,10 +956,10 @@ static void htb_reset(struct Qdisc *sch)
 	for (i = 0; i < q->clhash.hashsize; i++) {
 		hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
 			if (cl->level)
-				memset(&cl->un.inner, 0, sizeof(cl->un.inner));
+				memset(&cl->inner, 0, sizeof(cl->inner));
 			else {
-				if (cl->un.leaf.q)
-					qdisc_reset(cl->un.leaf.q);
+				if (cl->leaf.q)
+					qdisc_reset(cl->leaf.q);
 			}
 			cl->prio_activity = 0;
 			cl->cmode = HTB_CAN_SEND;
@@ -1098,8 +1082,8 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
 	 */
 	tcm->tcm_parent = cl->parent ? cl->parent->common.classid : TC_H_ROOT;
 	tcm->tcm_handle = cl->common.classid;
-	if (!cl->level && cl->un.leaf.q)
-		tcm->tcm_info = cl->un.leaf.q->handle;
+	if (!cl->level && cl->leaf.q)
+		tcm->tcm_info = cl->leaf.q->handle;
 
 	nest = nla_nest_start(skb, TCA_OPTIONS);
 	if (nest == NULL)
@@ -1142,9 +1126,9 @@ htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d)
 	};
 	__u32 qlen = 0;
 
-	if (!cl->level && cl->un.leaf.q) {
-		qlen = cl->un.leaf.q->q.qlen;
-		qs.backlog = cl->un.leaf.q->qstats.backlog;
+	if (!cl->level && cl->leaf.q) {
+		qlen = cl->leaf.q->q.qlen;
+		qs.backlog = cl->leaf.q->qstats.backlog;
 	}
 	cl->xstats.tokens = clamp_t(s64, PSCHED_NS2TICKS(cl->tokens),
 				    INT_MIN, INT_MAX);
@@ -1172,14 +1156,14 @@ static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
 				     cl->common.classid, extack)) == NULL)
 		return -ENOBUFS;
 
-	*old = qdisc_replace(sch, new, &cl->un.leaf.q);
+	*old = qdisc_replace(sch, new, &cl->leaf.q);
 	return 0;
 }
 
 static struct Qdisc *htb_leaf(struct Qdisc *sch, unsigned long arg)
 {
 	struct htb_class *cl = (struct htb_class *)arg;
-	return !cl->level ? cl->un.leaf.q : NULL;
+	return !cl->level ? cl->leaf.q : NULL;
 }
 
 static void htb_qlen_notify(struct Qdisc *sch, unsigned long arg)
@@ -1205,15 +1189,15 @@ static void htb_parent_to_leaf(struct htb_sched *q, struct htb_class *cl,
 {
 	struct htb_class *parent = cl->parent;
 
-	WARN_ON(cl->level || !cl->un.leaf.q || cl->prio_activity);
+	WARN_ON(cl->level || !cl->leaf.q || cl->prio_activity);
 
 	if (parent->cmode != HTB_CAN_SEND)
 		htb_safe_rb_erase(&parent->pq_node,
 				  &q->hlevel[parent->level].wait_pq);
 
 	parent->level = 0;
-	memset(&parent->un.inner, 0, sizeof(parent->un.inner));
-	parent->un.leaf.q = new_q ? new_q : &noop_qdisc;
+	memset(&parent->inner, 0, sizeof(parent->inner));
+	parent->leaf.q = new_q ? new_q : &noop_qdisc;
 	parent->tokens = parent->buffer;
 	parent->ctokens = parent->cbuffer;
 	parent->t_c = ktime_get_ns();
@@ -1223,8 +1207,8 @@ static void htb_parent_to_leaf(struct htb_sched *q, struct htb_class *cl,
 static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
 {
 	if (!cl->level) {
-		WARN_ON(!cl->un.leaf.q);
-		qdisc_destroy(cl->un.leaf.q);
+		WARN_ON(!cl->leaf.q);
+		qdisc_put(cl->leaf.q);
 	}
 	gen_kill_estimator(&cl->rate_est);
 	tcf_block_put(cl->block);
@@ -1286,11 +1270,11 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg)
 	sch_tree_lock(sch);
 
 	if (!cl->level) {
-		unsigned int qlen = cl->un.leaf.q->q.qlen;
-		unsigned int backlog = cl->un.leaf.q->qstats.backlog;
+		unsigned int qlen = cl->leaf.q->q.qlen;
+		unsigned int backlog = cl->leaf.q->qstats.backlog;
 
-		qdisc_reset(cl->un.leaf.q);
-		qdisc_tree_reduce_backlog(cl->un.leaf.q, qlen, backlog);
+		qdisc_reset(cl->leaf.q);
+		qdisc_tree_reduce_backlog(cl->leaf.q, qlen, backlog);
 	}
 
 	/* delete from hash and active; remainder in destroy_class */
@@ -1419,13 +1403,13 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
 					  classid, NULL);
 		sch_tree_lock(sch);
 		if (parent && !parent->level) {
-			unsigned int qlen = parent->un.leaf.q->q.qlen;
-			unsigned int backlog = parent->un.leaf.q->qstats.backlog;
+			unsigned int qlen = parent->leaf.q->q.qlen;
+			unsigned int backlog = parent->leaf.q->qstats.backlog;
 
 			/* turn parent into inner node */
-			qdisc_reset(parent->un.leaf.q);
-			qdisc_tree_reduce_backlog(parent->un.leaf.q, qlen, backlog);
-			qdisc_destroy(parent->un.leaf.q);
+			qdisc_reset(parent->leaf.q);
+			qdisc_tree_reduce_backlog(parent->leaf.q, qlen, backlog);
+			qdisc_put(parent->leaf.q);
 			if (parent->prio_activity)
 				htb_deactivate(q, parent);
 
@@ -1436,10 +1420,10 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
 			}
 			parent->level = (parent->parent ? parent->parent->level
 					 : TC_HTB_MAXDEPTH) - 1;
-			memset(&parent->un.inner, 0, sizeof(parent->un.inner));
+			memset(&parent->inner, 0, sizeof(parent->inner));
 		}
 		/* leaf (we) needs elementary qdisc */
-		cl->un.leaf.q = new_q ? new_q : &noop_qdisc;
+		cl->leaf.q = new_q ? new_q : &noop_qdisc;
 
 		cl->common.classid = classid;
 		cl->parent = parent;
@@ -1455,8 +1439,8 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
 		qdisc_class_hash_insert(&q->clhash, &cl->common);
 		if (parent)
 			parent->children++;
-		if (cl->un.leaf.q != &noop_qdisc)
-			qdisc_hash_add(cl->un.leaf.q, true);
+		if (cl->leaf.q != &noop_qdisc)
+			qdisc_hash_add(cl->leaf.q, true);
 	} else {
 		if (tca[TCA_RATE]) {
 			err = gen_replace_estimator(&cl->bstats, NULL,
@@ -1478,7 +1462,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
 	psched_ratecfg_precompute(&cl->ceil, &hopt->ceil, ceil64);
 
 	/* it used to be a nasty bug here, we have to check that node
-	 * is really leaf before changing cl->un.leaf !
+	 * is really leaf before changing cl->leaf !
 	 */
 	if (!cl->level) {
 		u64 quantum = cl->rate.rate_bytes_ps;
diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c
index d6b8ae4ed7a3..f20f3a0f8424 100644
--- a/net/sched/sch_mq.c
+++ b/net/sched/sch_mq.c
@@ -65,7 +65,7 @@ static void mq_destroy(struct Qdisc *sch)
 	if (!priv->qdiscs)
 		return;
 	for (ntx = 0; ntx < dev->num_tx_queues && priv->qdiscs[ntx]; ntx++)
-		qdisc_destroy(priv->qdiscs[ntx]);
+		qdisc_put(priv->qdiscs[ntx]);
 	kfree(priv->qdiscs);
 }
 
@@ -119,7 +119,7 @@ static void mq_attach(struct Qdisc *sch)
 		qdisc = priv->qdiscs[ntx];
 		old = dev_graft_qdisc(qdisc->dev_queue, qdisc);
 		if (old)
-			qdisc_destroy(old);
+			qdisc_put(old);
 #ifdef CONFIG_NET_SCHED
 		if (ntx < dev->real_num_tx_queues)
 			qdisc_hash_add(qdisc, false);
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
index 0e9d761cdd80..d364e63c396d 100644
--- a/net/sched/sch_mqprio.c
+++ b/net/sched/sch_mqprio.c
@@ -40,7 +40,7 @@ static void mqprio_destroy(struct Qdisc *sch)
 		for (ntx = 0;
 		     ntx < dev->num_tx_queues && priv->qdiscs[ntx];
 		     ntx++)
-			qdisc_destroy(priv->qdiscs[ntx]);
+			qdisc_put(priv->qdiscs[ntx]);
 		kfree(priv->qdiscs);
 	}
 
@@ -300,7 +300,7 @@ static void mqprio_attach(struct Qdisc *sch)
 		qdisc = priv->qdiscs[ntx];
 		old = dev_graft_qdisc(qdisc->dev_queue, qdisc);
 		if (old)
-			qdisc_destroy(old);
+			qdisc_put(old);
 		if (ntx < dev->real_num_tx_queues)
 			qdisc_hash_add(qdisc, false);
 	}
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
index 1da7ea8de0ad..7410ce4d0321 100644
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -175,7 +175,7 @@ multiq_destroy(struct Qdisc *sch)
 
 	tcf_block_put(q->block);
 	for (band = 0; band < q->bands; band++)
-		qdisc_destroy(q->queues[band]);
+		qdisc_put(q->queues[band]);
 
 	kfree(q->queues);
 }
@@ -204,7 +204,7 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt,
 			q->queues[i] = &noop_qdisc;
 			qdisc_tree_reduce_backlog(child, child->q.qlen,
 						  child->qstats.backlog);
-			qdisc_destroy(child);
+			qdisc_put(child);
 		}
 	}
 
@@ -228,7 +228,7 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt,
 					qdisc_tree_reduce_backlog(old,
 								  old->q.qlen,
 								  old->qstats.backlog);
-					qdisc_destroy(old);
+					qdisc_put(old);
 				}
 				sch_tree_unlock(sch);
 			}
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index ad18a2052416..57b3ad9394ad 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -412,16 +412,6 @@ static struct sk_buff *netem_segment(struct sk_buff *skb, struct Qdisc *sch,
 	return segs;
 }
 
-static void netem_enqueue_skb_head(struct qdisc_skb_head *qh, struct sk_buff *skb)
-{
-	skb->next = qh->head;
-
-	if (!qh->head)
-		qh->tail = skb;
-	qh->head = skb;
-	qh->qlen++;
-}
-
 /*
  * Insert one skb into qdisc.
  * Note: parent depends on return value to account for queue length.
@@ -570,7 +560,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 		cb->time_to_send = ktime_get_ns();
 		q->counter = 0;
 
-		netem_enqueue_skb_head(&sch->q, skb);
+		__qdisc_enqueue_head(skb, &sch->q);
 		sch->qstats.requeues++;
 	}
 
@@ -578,7 +568,7 @@ finish_segs:
 	if (segs) {
 		while (segs) {
 			skb2 = segs->next;
-			segs->next = NULL;
+			skb_mark_not_on_list(segs);
 			qdisc_skb_cb(segs)->pkt_len = segs->len;
 			last_len = segs->len;
 			rc = qdisc_enqueue(segs, sch, to_free);
@@ -1032,7 +1022,7 @@ static void netem_destroy(struct Qdisc *sch)
 
 	qdisc_watchdog_cancel(&q->watchdog);
 	if (q->qdisc)
-		qdisc_destroy(q->qdisc);
+		qdisc_put(q->qdisc);
 	dist_free(q->delay_dist);
 	dist_free(q->slot_dist);
 }
diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c
index 18d30bb86881..d1429371592f 100644
--- a/net/sched/sch_pie.c
+++ b/net/sched/sch_pie.c
@@ -110,8 +110,8 @@ static bool drop_early(struct Qdisc *sch, u32 packet_size)
 	/* If current delay is less than half of target, and
 	 * if drop prob is low already, disable early_drop
 	 */
-	if ((q->vars.qdelay < q->params.target / 2)
-	    && (q->vars.prob < MAX_PROB / 5))
+	if ((q->vars.qdelay < q->params.target / 2) &&
+	    (q->vars.prob < MAX_PROB / 5))
 		return false;
 
 	/* If we have fewer than 2 mtu-sized packets, disable drop_early,
@@ -209,7 +209,8 @@ static int pie_change(struct Qdisc *sch, struct nlattr *opt,
 
 	/* tupdate is in jiffies */
 	if (tb[TCA_PIE_TUPDATE])
-		q->params.tupdate = usecs_to_jiffies(nla_get_u32(tb[TCA_PIE_TUPDATE]));
+		q->params.tupdate =
+			usecs_to_jiffies(nla_get_u32(tb[TCA_PIE_TUPDATE]));
 
 	if (tb[TCA_PIE_LIMIT]) {
 		u32 limit = nla_get_u32(tb[TCA_PIE_LIMIT]);
@@ -247,7 +248,6 @@ static int pie_change(struct Qdisc *sch, struct nlattr *opt,
 
 static void pie_process_dequeue(struct Qdisc *sch, struct sk_buff *skb)
 {
-
 	struct pie_sched_data *q = qdisc_priv(sch);
 	int qlen = sch->qstats.backlog;	/* current queue size in bytes */
 
@@ -294,9 +294,9 @@ static void pie_process_dequeue(struct Qdisc *sch, struct sk_buff *skb)
 			 * dq_count to 0 to re-enter the if block when the next
 			 * packet is dequeued
 			 */
-			if (qlen < QUEUE_THRESHOLD)
+			if (qlen < QUEUE_THRESHOLD) {
 				q->vars.dq_count = DQCOUNT_INVALID;
-			else {
+			} else {
 				q->vars.dq_count = 0;
 				q->vars.dq_tstamp = psched_get_time();
 			}
@@ -370,7 +370,7 @@ static void calculate_probability(struct Qdisc *sch)
 	oldprob = q->vars.prob;
 
 	/* to ensure we increase probability in steps of no more than 2% */
-	if (delta > (s32) (MAX_PROB / (100 / 2)) &&
+	if (delta > (s32)(MAX_PROB / (100 / 2)) &&
 	    q->vars.prob >= MAX_PROB / 10)
 		delta = (MAX_PROB / 100) * 2;
 
@@ -405,7 +405,7 @@ static void calculate_probability(struct Qdisc *sch)
 	 * delay is 0 for 2 consecutive Tupdate periods.
 	 */
 
-	if ((qdelay == 0) && (qdelay_old == 0) && update_prob)
+	if (qdelay == 0 && qdelay_old == 0 && update_prob)
 		q->vars.prob = (q->vars.prob * 98) / 100;
 
 	q->vars.qdelay = qdelay;
@@ -419,8 +419,8 @@ static void calculate_probability(struct Qdisc *sch)
 	 */
 	if ((q->vars.qdelay < q->params.target / 2) &&
 	    (q->vars.qdelay_old < q->params.target / 2) &&
-	    (q->vars.prob == 0) &&
-	    (q->vars.avg_dq_rate > 0))
+	    q->vars.prob == 0 &&
+	    q->vars.avg_dq_rate > 0)
 		pie_vars_init(&q->vars);
 }
 
@@ -437,7 +437,6 @@ static void pie_timer(struct timer_list *t)
 	if (q->params.tupdate)
 		mod_timer(&q->adapt_timer, jiffies + q->params.tupdate);
 	spin_unlock(root_lock);
-
 }
 
 static int pie_init(struct Qdisc *sch, struct nlattr *opt,
@@ -469,15 +468,16 @@ static int pie_dump(struct Qdisc *sch, struct sk_buff *skb)
 	struct nlattr *opts;
 
 	opts = nla_nest_start(skb, TCA_OPTIONS);
-	if (opts == NULL)
+	if (!opts)
 		goto nla_put_failure;
 
 	/* convert target from pschedtime to us */
 	if (nla_put_u32(skb, TCA_PIE_TARGET,
-			((u32) PSCHED_TICKS2NS(q->params.target)) /
+			((u32)PSCHED_TICKS2NS(q->params.target)) /
 			NSEC_PER_USEC) ||
 	    nla_put_u32(skb, TCA_PIE_LIMIT, sch->limit) ||
-	    nla_put_u32(skb, TCA_PIE_TUPDATE, jiffies_to_usecs(q->params.tupdate)) ||
+	    nla_put_u32(skb, TCA_PIE_TUPDATE,
+			jiffies_to_usecs(q->params.tupdate)) ||
 	    nla_put_u32(skb, TCA_PIE_ALPHA, q->params.alpha) ||
 	    nla_put_u32(skb, TCA_PIE_BETA, q->params.beta) ||
 	    nla_put_u32(skb, TCA_PIE_ECN, q->params.ecn) ||
@@ -489,7 +489,6 @@ static int pie_dump(struct Qdisc *sch, struct sk_buff *skb)
 nla_put_failure:
 	nla_nest_cancel(skb, opts);
 	return -1;
-
 }
 
 static int pie_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
@@ -497,7 +496,7 @@ static int pie_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
 	struct pie_sched_data *q = qdisc_priv(sch);
 	struct tc_pie_xstats st = {
 		.prob		= q->vars.prob,
-		.delay		= ((u32) PSCHED_TICKS2NS(q->vars.qdelay)) /
+		.delay		= ((u32)PSCHED_TICKS2NS(q->vars.qdelay)) /
 				   NSEC_PER_USEC,
 		/* unscale and return dq_rate in bytes per sec */
 		.avg_dq_rate	= q->vars.avg_dq_rate *
@@ -514,8 +513,7 @@ static int pie_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
 
 static struct sk_buff *pie_qdisc_dequeue(struct Qdisc *sch)
 {
-	struct sk_buff *skb;
-	skb = qdisc_dequeue_head(sch);
+	struct sk_buff *skb = qdisc_dequeue_head(sch);
 
 	if (!skb)
 		return NULL;
@@ -527,6 +525,7 @@ static struct sk_buff *pie_qdisc_dequeue(struct Qdisc *sch)
 static void pie_reset(struct Qdisc *sch)
 {
 	struct pie_sched_data *q = qdisc_priv(sch);
+
 	qdisc_reset_queue(sch);
 	pie_vars_init(&q->vars);
 }
@@ -534,6 +533,7 @@ static void pie_reset(struct Qdisc *sch)
 static void pie_destroy(struct Qdisc *sch)
 {
 	struct pie_sched_data *q = qdisc_priv(sch);
+
 	q->params.tupdate = 0;
 	del_timer_sync(&q->adapt_timer);
 }
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index 222e53d3d27a..f8af98621179 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -175,7 +175,7 @@ prio_destroy(struct Qdisc *sch)
 	tcf_block_put(q->block);
 	prio_offload(sch, NULL);
 	for (prio = 0; prio < q->bands; prio++)
-		qdisc_destroy(q->queues[prio]);
+		qdisc_put(q->queues[prio]);
 }
 
 static int prio_tune(struct Qdisc *sch, struct nlattr *opt,
@@ -205,7 +205,7 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt,
 					      extack);
 		if (!queues[i]) {
 			while (i > oldbands)
-				qdisc_destroy(queues[--i]);
+				qdisc_put(queues[--i]);
 			return -ENOMEM;
 		}
 	}
@@ -220,7 +220,7 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt,
 
 		qdisc_tree_reduce_backlog(child, child->q.qlen,
 					  child->qstats.backlog);
-		qdisc_destroy(child);
+		qdisc_put(child);
 	}
 
 	for (i = oldbands; i < q->bands; i++) {
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index bb1a9c11fc54..dc37c4ead439 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -526,7 +526,7 @@ set_change_agg:
 	return 0;
 
 destroy_class:
-	qdisc_destroy(cl->qdisc);
+	qdisc_put(cl->qdisc);
 	kfree(cl);
 	return err;
 }
@@ -537,7 +537,7 @@ static void qfq_destroy_class(struct Qdisc *sch, struct qfq_class *cl)
 
 	qfq_rm_from_agg(q, cl);
 	gen_kill_estimator(&cl->rate_est);
-	qdisc_destroy(cl->qdisc);
+	qdisc_put(cl->qdisc);
 	kfree(cl);
 }
 
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index 56c181c3feeb..3ce6c0a2c493 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -181,7 +181,7 @@ static void red_destroy(struct Qdisc *sch)
 
 	del_timer_sync(&q->adapt_timer);
 	red_offload(sch, false);
-	qdisc_destroy(q->qdisc);
+	qdisc_put(q->qdisc);
 }
 
 static const struct nla_policy red_policy[TCA_RED_MAX + 1] = {
@@ -233,7 +233,7 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt,
 	if (child) {
 		qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen,
 					  q->qdisc->qstats.backlog);
-		qdisc_destroy(q->qdisc);
+		qdisc_put(q->qdisc);
 		q->qdisc = child;
 	}
 
diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c
index 7cbdad8419b7..bab506b01a32 100644
--- a/net/sched/sch_sfb.c
+++ b/net/sched/sch_sfb.c
@@ -469,7 +469,7 @@ static void sfb_destroy(struct Qdisc *sch)
 	struct sfb_sched_data *q = qdisc_priv(sch);
 
 	tcf_block_put(q->block);
-	qdisc_destroy(q->qdisc);
+	qdisc_put(q->qdisc);
 }
 
 static const struct nla_policy sfb_policy[TCA_SFB_MAX + 1] = {
@@ -523,7 +523,7 @@ static int sfb_change(struct Qdisc *sch, struct nlattr *opt,
 
 	qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen,
 				  q->qdisc->qstats.backlog);
-	qdisc_destroy(q->qdisc);
+	qdisc_put(q->qdisc);
 	q->qdisc = child;
 
 	q->rehash_interval = msecs_to_jiffies(ctl->rehash_interval);
diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
new file mode 100644
index 000000000000..206e4dbed12f
--- /dev/null
+++ b/net/sched/sch_taprio.c
@@ -0,0 +1,962 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/* net/sched/sch_taprio.c	 Time Aware Priority Scheduler
+ *
+ * Authors:	Vinicius Costa Gomes <vinicius.gomes@intel.com>
+ *
+ */
+
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/list.h>
+#include <linux/errno.h>
+#include <linux/skbuff.h>
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <net/netlink.h>
+#include <net/pkt_sched.h>
+#include <net/pkt_cls.h>
+#include <net/sch_generic.h>
+
+#define TAPRIO_ALL_GATES_OPEN -1
+
+struct sched_entry {
+	struct list_head list;
+
+	/* The instant that this entry "closes" and the next one
+	 * should open, the qdisc will make some effort so that no
+	 * packet leaves after this time.
+	 */
+	ktime_t close_time;
+	atomic_t budget;
+	int index;
+	u32 gate_mask;
+	u32 interval;
+	u8 command;
+};
+
+struct taprio_sched {
+	struct Qdisc **qdiscs;
+	struct Qdisc *root;
+	s64 base_time;
+	int clockid;
+	int picos_per_byte; /* Using picoseconds because for 10Gbps+
+			     * speeds it's sub-nanoseconds per byte
+			     */
+	size_t num_entries;
+
+	/* Protects the update side of the RCU protected current_entry */
+	spinlock_t current_entry_lock;
+	struct sched_entry __rcu *current_entry;
+	struct list_head entries;
+	ktime_t (*get_time)(void);
+	struct hrtimer advance_timer;
+};
+
+static int taprio_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+			  struct sk_buff **to_free)
+{
+	struct taprio_sched *q = qdisc_priv(sch);
+	struct Qdisc *child;
+	int queue;
+
+	queue = skb_get_queue_mapping(skb);
+
+	child = q->qdiscs[queue];
+	if (unlikely(!child))
+		return qdisc_drop(skb, sch, to_free);
+
+	qdisc_qstats_backlog_inc(sch, skb);
+	sch->q.qlen++;
+
+	return qdisc_enqueue(skb, child, to_free);
+}
+
+static struct sk_buff *taprio_peek(struct Qdisc *sch)
+{
+	struct taprio_sched *q = qdisc_priv(sch);
+	struct net_device *dev = qdisc_dev(sch);
+	struct sched_entry *entry;
+	struct sk_buff *skb;
+	u32 gate_mask;
+	int i;
+
+	rcu_read_lock();
+	entry = rcu_dereference(q->current_entry);
+	gate_mask = entry ? entry->gate_mask : -1;
+	rcu_read_unlock();
+
+	if (!gate_mask)
+		return NULL;
+
+	for (i = 0; i < dev->num_tx_queues; i++) {
+		struct Qdisc *child = q->qdiscs[i];
+		int prio;
+		u8 tc;
+
+		if (unlikely(!child))
+			continue;
+
+		skb = child->ops->peek(child);
+		if (!skb)
+			continue;
+
+		prio = skb->priority;
+		tc = netdev_get_prio_tc_map(dev, prio);
+
+		if (!(gate_mask & BIT(tc)))
+			return NULL;
+
+		return skb;
+	}
+
+	return NULL;
+}
+
+static inline int length_to_duration(struct taprio_sched *q, int len)
+{
+	return (len * q->picos_per_byte) / 1000;
+}
+
+static struct sk_buff *taprio_dequeue(struct Qdisc *sch)
+{
+	struct taprio_sched *q = qdisc_priv(sch);
+	struct net_device *dev = qdisc_dev(sch);
+	struct sched_entry *entry;
+	struct sk_buff *skb;
+	u32 gate_mask;
+	int i;
+
+	rcu_read_lock();
+	entry = rcu_dereference(q->current_entry);
+	/* if there's no entry, it means that the schedule didn't
+	 * start yet, so force all gates to be open, this is in
+	 * accordance to IEEE 802.1Qbv-2015 Section 8.6.9.4.5
+	 * "AdminGateSates"
+	 */
+	gate_mask = entry ? entry->gate_mask : TAPRIO_ALL_GATES_OPEN;
+	rcu_read_unlock();
+
+	if (!gate_mask)
+		return NULL;
+
+	for (i = 0; i < dev->num_tx_queues; i++) {
+		struct Qdisc *child = q->qdiscs[i];
+		ktime_t guard;
+		int prio;
+		int len;
+		u8 tc;
+
+		if (unlikely(!child))
+			continue;
+
+		skb = child->ops->peek(child);
+		if (!skb)
+			continue;
+
+		prio = skb->priority;
+		tc = netdev_get_prio_tc_map(dev, prio);
+
+		if (!(gate_mask & BIT(tc)))
+			continue;
+
+		len = qdisc_pkt_len(skb);
+		guard = ktime_add_ns(q->get_time(),
+				     length_to_duration(q, len));
+
+		/* In the case that there's no gate entry, there's no
+		 * guard band ...
+		 */
+		if (gate_mask != TAPRIO_ALL_GATES_OPEN &&
+		    ktime_after(guard, entry->close_time))
+			return NULL;
+
+		/* ... and no budget. */
+		if (gate_mask != TAPRIO_ALL_GATES_OPEN &&
+		    atomic_sub_return(len, &entry->budget) < 0)
+			return NULL;
+
+		skb = child->ops->dequeue(child);
+		if (unlikely(!skb))
+			return NULL;
+
+		qdisc_bstats_update(sch, skb);
+		qdisc_qstats_backlog_dec(sch, skb);
+		sch->q.qlen--;
+
+		return skb;
+	}
+
+	return NULL;
+}
+
+static bool should_restart_cycle(const struct taprio_sched *q,
+				 const struct sched_entry *entry)
+{
+	WARN_ON(!entry);
+
+	return list_is_last(&entry->list, &q->entries);
+}
+
+static enum hrtimer_restart advance_sched(struct hrtimer *timer)
+{
+	struct taprio_sched *q = container_of(timer, struct taprio_sched,
+					      advance_timer);
+	struct sched_entry *entry, *next;
+	struct Qdisc *sch = q->root;
+	ktime_t close_time;
+
+	spin_lock(&q->current_entry_lock);
+	entry = rcu_dereference_protected(q->current_entry,
+					  lockdep_is_held(&q->current_entry_lock));
+
+	/* This is the case that it's the first time that the schedule
+	 * runs, so it only happens once per schedule. The first entry
+	 * is pre-calculated during the schedule initialization.
+	 */
+	if (unlikely(!entry)) {
+		next = list_first_entry(&q->entries, struct sched_entry,
+					list);
+		close_time = next->close_time;
+		goto first_run;
+	}
+
+	if (should_restart_cycle(q, entry))
+		next = list_first_entry(&q->entries, struct sched_entry,
+					list);
+	else
+		next = list_next_entry(entry, list);
+
+	close_time = ktime_add_ns(entry->close_time, next->interval);
+
+	next->close_time = close_time;
+	atomic_set(&next->budget,
+		   (next->interval * 1000) / q->picos_per_byte);
+
+first_run:
+	rcu_assign_pointer(q->current_entry, next);
+	spin_unlock(&q->current_entry_lock);
+
+	hrtimer_set_expires(&q->advance_timer, close_time);
+
+	rcu_read_lock();
+	__netif_schedule(sch);
+	rcu_read_unlock();
+
+	return HRTIMER_RESTART;
+}
+
+static const struct nla_policy entry_policy[TCA_TAPRIO_SCHED_ENTRY_MAX + 1] = {
+	[TCA_TAPRIO_SCHED_ENTRY_INDEX]	   = { .type = NLA_U32 },
+	[TCA_TAPRIO_SCHED_ENTRY_CMD]	   = { .type = NLA_U8 },
+	[TCA_TAPRIO_SCHED_ENTRY_GATE_MASK] = { .type = NLA_U32 },
+	[TCA_TAPRIO_SCHED_ENTRY_INTERVAL]  = { .type = NLA_U32 },
+};
+
+static const struct nla_policy entry_list_policy[TCA_TAPRIO_SCHED_MAX + 1] = {
+	[TCA_TAPRIO_SCHED_ENTRY] = { .type = NLA_NESTED },
+};
+
+static const struct nla_policy taprio_policy[TCA_TAPRIO_ATTR_MAX + 1] = {
+	[TCA_TAPRIO_ATTR_PRIOMAP]	       = {
+		.len = sizeof(struct tc_mqprio_qopt)
+	},
+	[TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST]     = { .type = NLA_NESTED },
+	[TCA_TAPRIO_ATTR_SCHED_BASE_TIME]      = { .type = NLA_S64 },
+	[TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY]   = { .type = NLA_NESTED },
+	[TCA_TAPRIO_ATTR_SCHED_CLOCKID]        = { .type = NLA_S32 },
+};
+
+static int fill_sched_entry(struct nlattr **tb, struct sched_entry *entry,
+			    struct netlink_ext_ack *extack)
+{
+	u32 interval = 0;
+
+	if (tb[TCA_TAPRIO_SCHED_ENTRY_CMD])
+		entry->command = nla_get_u8(
+			tb[TCA_TAPRIO_SCHED_ENTRY_CMD]);
+
+	if (tb[TCA_TAPRIO_SCHED_ENTRY_GATE_MASK])
+		entry->gate_mask = nla_get_u32(
+			tb[TCA_TAPRIO_SCHED_ENTRY_GATE_MASK]);
+
+	if (tb[TCA_TAPRIO_SCHED_ENTRY_INTERVAL])
+		interval = nla_get_u32(
+			tb[TCA_TAPRIO_SCHED_ENTRY_INTERVAL]);
+
+	if (interval == 0) {
+		NL_SET_ERR_MSG(extack, "Invalid interval for schedule entry");
+		return -EINVAL;
+	}
+
+	entry->interval = interval;
+
+	return 0;
+}
+
+static int parse_sched_entry(struct nlattr *n, struct sched_entry *entry,
+			     int index, struct netlink_ext_ack *extack)
+{
+	struct nlattr *tb[TCA_TAPRIO_SCHED_ENTRY_MAX + 1] = { };
+	int err;
+
+	err = nla_parse_nested(tb, TCA_TAPRIO_SCHED_ENTRY_MAX, n,
+			       entry_policy, NULL);
+	if (err < 0) {
+		NL_SET_ERR_MSG(extack, "Could not parse nested entry");
+		return -EINVAL;
+	}
+
+	entry->index = index;
+
+	return fill_sched_entry(tb, entry, extack);
+}
+
+/* Returns the number of entries in case of success */
+static int parse_sched_single_entry(struct nlattr *n,
+				    struct taprio_sched *q,
+				    struct netlink_ext_ack *extack)
+{
+	struct nlattr *tb_entry[TCA_TAPRIO_SCHED_ENTRY_MAX + 1] = { };
+	struct nlattr *tb_list[TCA_TAPRIO_SCHED_MAX + 1] = { };
+	struct sched_entry *entry;
+	bool found = false;
+	u32 index;
+	int err;
+
+	err = nla_parse_nested(tb_list, TCA_TAPRIO_SCHED_MAX,
+			       n, entry_list_policy, NULL);
+	if (err < 0) {
+		NL_SET_ERR_MSG(extack, "Could not parse nested entry");
+		return -EINVAL;
+	}
+
+	if (!tb_list[TCA_TAPRIO_SCHED_ENTRY]) {
+		NL_SET_ERR_MSG(extack, "Single-entry must include an entry");
+		return -EINVAL;
+	}
+
+	err = nla_parse_nested(tb_entry, TCA_TAPRIO_SCHED_ENTRY_MAX,
+			       tb_list[TCA_TAPRIO_SCHED_ENTRY],
+			       entry_policy, NULL);
+	if (err < 0) {
+		NL_SET_ERR_MSG(extack, "Could not parse nested entry");
+		return -EINVAL;
+	}
+
+	if (!tb_entry[TCA_TAPRIO_SCHED_ENTRY_INDEX]) {
+		NL_SET_ERR_MSG(extack, "Entry must specify an index\n");
+		return -EINVAL;
+	}
+
+	index = nla_get_u32(tb_entry[TCA_TAPRIO_SCHED_ENTRY_INDEX]);
+	if (index >= q->num_entries) {
+		NL_SET_ERR_MSG(extack, "Index for single entry exceeds number of entries in schedule");
+		return -EINVAL;
+	}
+
+	list_for_each_entry(entry, &q->entries, list) {
+		if (entry->index == index) {
+			found = true;
+			break;
+		}
+	}
+
+	if (!found) {
+		NL_SET_ERR_MSG(extack, "Could not find entry");
+		return -ENOENT;
+	}
+
+	err = fill_sched_entry(tb_entry, entry, extack);
+	if (err < 0)
+		return err;
+
+	return q->num_entries;
+}
+
+static int parse_sched_list(struct nlattr *list,
+			    struct taprio_sched *q,
+			    struct netlink_ext_ack *extack)
+{
+	struct nlattr *n;
+	int err, rem;
+	int i = 0;
+
+	if (!list)
+		return -EINVAL;
+
+	nla_for_each_nested(n, list, rem) {
+		struct sched_entry *entry;
+
+		if (nla_type(n) != TCA_TAPRIO_SCHED_ENTRY) {
+			NL_SET_ERR_MSG(extack, "Attribute is not of type 'entry'");
+			continue;
+		}
+
+		entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+		if (!entry) {
+			NL_SET_ERR_MSG(extack, "Not enough memory for entry");
+			return -ENOMEM;
+		}
+
+		err = parse_sched_entry(n, entry, i, extack);
+		if (err < 0) {
+			kfree(entry);
+			return err;
+		}
+
+		list_add_tail(&entry->list, &q->entries);
+		i++;
+	}
+
+	q->num_entries = i;
+
+	return i;
+}
+
+/* Returns the number of entries in case of success */
+static int parse_taprio_opt(struct nlattr **tb, struct taprio_sched *q,
+			    struct netlink_ext_ack *extack)
+{
+	int err = 0;
+	int clockid;
+
+	if (tb[TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST] &&
+	    tb[TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY])
+		return -EINVAL;
+
+	if (tb[TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY] && q->num_entries == 0)
+		return -EINVAL;
+
+	if (q->clockid == -1 && !tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID])
+		return -EINVAL;
+
+	if (tb[TCA_TAPRIO_ATTR_SCHED_BASE_TIME])
+		q->base_time = nla_get_s64(
+			tb[TCA_TAPRIO_ATTR_SCHED_BASE_TIME]);
+
+	if (tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]) {
+		clockid = nla_get_s32(tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]);
+
+		/* We only support static clockids and we don't allow
+		 * for it to be modified after the first init.
+		 */
+		if (clockid < 0 || (q->clockid != -1 && q->clockid != clockid))
+			return -EINVAL;
+
+		q->clockid = clockid;
+	}
+
+	if (tb[TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST])
+		err = parse_sched_list(
+			tb[TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST], q, extack);
+	else if (tb[TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY])
+		err = parse_sched_single_entry(
+			tb[TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY], q, extack);
+
+	/* parse_sched_* return the number of entries in the schedule,
+	 * a schedule with zero entries is an error.
+	 */
+	if (err == 0) {
+		NL_SET_ERR_MSG(extack, "The schedule should contain at least one entry");
+		return -EINVAL;
+	}
+
+	return err;
+}
+
+static int taprio_parse_mqprio_opt(struct net_device *dev,
+				   struct tc_mqprio_qopt *qopt,
+				   struct netlink_ext_ack *extack)
+{
+	int i, j;
+
+	if (!qopt) {
+		NL_SET_ERR_MSG(extack, "'mqprio' configuration is necessary");
+		return -EINVAL;
+	}
+
+	/* Verify num_tc is not out of max range */
+	if (qopt->num_tc > TC_MAX_QUEUE) {
+		NL_SET_ERR_MSG(extack, "Number of traffic classes is outside valid range");
+		return -EINVAL;
+	}
+
+	/* taprio imposes that traffic classes map 1:n to tx queues */
+	if (qopt->num_tc > dev->num_tx_queues) {
+		NL_SET_ERR_MSG(extack, "Number of traffic classes is greater than number of HW queues");
+		return -EINVAL;
+	}
+
+	/* Verify priority mapping uses valid tcs */
+	for (i = 0; i < TC_BITMASK + 1; i++) {
+		if (qopt->prio_tc_map[i] >= qopt->num_tc) {
+			NL_SET_ERR_MSG(extack, "Invalid traffic class in priority to traffic class mapping");
+			return -EINVAL;
+		}
+	}
+
+	for (i = 0; i < qopt->num_tc; i++) {
+		unsigned int last = qopt->offset[i] + qopt->count[i];
+
+		/* Verify the queue count is in tx range being equal to the
+		 * real_num_tx_queues indicates the last queue is in use.
+		 */
+		if (qopt->offset[i] >= dev->num_tx_queues ||
+		    !qopt->count[i] ||
+		    last > dev->real_num_tx_queues) {
+			NL_SET_ERR_MSG(extack, "Invalid queue in traffic class to queue mapping");
+			return -EINVAL;
+		}
+
+		/* Verify that the offset and counts do not overlap */
+		for (j = i + 1; j < qopt->num_tc; j++) {
+			if (last > qopt->offset[j]) {
+				NL_SET_ERR_MSG(extack, "Detected overlap in the traffic class to queue mapping");
+				return -EINVAL;
+			}
+		}
+	}
+
+	return 0;
+}
+
+static ktime_t taprio_get_start_time(struct Qdisc *sch)
+{
+	struct taprio_sched *q = qdisc_priv(sch);
+	struct sched_entry *entry;
+	ktime_t now, base, cycle;
+	s64 n;
+
+	base = ns_to_ktime(q->base_time);
+	cycle = 0;
+
+	/* Calculate the cycle_time, by summing all the intervals.
+	 */
+	list_for_each_entry(entry, &q->entries, list)
+		cycle = ktime_add_ns(cycle, entry->interval);
+
+	if (!cycle)
+		return base;
+
+	now = q->get_time();
+
+	if (ktime_after(base, now))
+		return base;
+
+	/* Schedule the start time for the beginning of the next
+	 * cycle.
+	 */
+	n = div64_s64(ktime_sub_ns(now, base), cycle);
+
+	return ktime_add_ns(base, (n + 1) * cycle);
+}
+
+static void taprio_start_sched(struct Qdisc *sch, ktime_t start)
+{
+	struct taprio_sched *q = qdisc_priv(sch);
+	struct sched_entry *first;
+	unsigned long flags;
+
+	spin_lock_irqsave(&q->current_entry_lock, flags);
+
+	first = list_first_entry(&q->entries, struct sched_entry,
+				 list);
+
+	first->close_time = ktime_add_ns(start, first->interval);
+	atomic_set(&first->budget,
+		   (first->interval * 1000) / q->picos_per_byte);
+	rcu_assign_pointer(q->current_entry, NULL);
+
+	spin_unlock_irqrestore(&q->current_entry_lock, flags);
+
+	hrtimer_start(&q->advance_timer, start, HRTIMER_MODE_ABS);
+}
+
+static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
+			 struct netlink_ext_ack *extack)
+{
+	struct nlattr *tb[TCA_TAPRIO_ATTR_MAX + 1] = { };
+	struct taprio_sched *q = qdisc_priv(sch);
+	struct net_device *dev = qdisc_dev(sch);
+	struct tc_mqprio_qopt *mqprio = NULL;
+	struct ethtool_link_ksettings ecmd;
+	int i, err, size;
+	s64 link_speed;
+	ktime_t start;
+
+	err = nla_parse_nested(tb, TCA_TAPRIO_ATTR_MAX, opt,
+			       taprio_policy, extack);
+	if (err < 0)
+		return err;
+
+	err = -EINVAL;
+	if (tb[TCA_TAPRIO_ATTR_PRIOMAP])
+		mqprio = nla_data(tb[TCA_TAPRIO_ATTR_PRIOMAP]);
+
+	err = taprio_parse_mqprio_opt(dev, mqprio, extack);
+	if (err < 0)
+		return err;
+
+	/* A schedule with less than one entry is an error */
+	size = parse_taprio_opt(tb, q, extack);
+	if (size < 0)
+		return size;
+
+	hrtimer_init(&q->advance_timer, q->clockid, HRTIMER_MODE_ABS);
+	q->advance_timer.function = advance_sched;
+
+	switch (q->clockid) {
+	case CLOCK_REALTIME:
+		q->get_time = ktime_get_real;
+		break;
+	case CLOCK_MONOTONIC:
+		q->get_time = ktime_get;
+		break;
+	case CLOCK_BOOTTIME:
+		q->get_time = ktime_get_boottime;
+		break;
+	case CLOCK_TAI:
+		q->get_time = ktime_get_clocktai;
+		break;
+	default:
+		return -ENOTSUPP;
+	}
+
+	for (i = 0; i < dev->num_tx_queues; i++) {
+		struct netdev_queue *dev_queue;
+		struct Qdisc *qdisc;
+
+		dev_queue = netdev_get_tx_queue(dev, i);
+		qdisc = qdisc_create_dflt(dev_queue,
+					  &pfifo_qdisc_ops,
+					  TC_H_MAKE(TC_H_MAJ(sch->handle),
+						    TC_H_MIN(i + 1)),
+					  extack);
+		if (!qdisc)
+			return -ENOMEM;
+
+		if (i < dev->real_num_tx_queues)
+			qdisc_hash_add(qdisc, false);
+
+		q->qdiscs[i] = qdisc;
+	}
+
+	if (mqprio) {
+		netdev_set_num_tc(dev, mqprio->num_tc);
+		for (i = 0; i < mqprio->num_tc; i++)
+			netdev_set_tc_queue(dev, i,
+					    mqprio->count[i],
+					    mqprio->offset[i]);
+
+		/* Always use supplied priority mappings */
+		for (i = 0; i < TC_BITMASK + 1; i++)
+			netdev_set_prio_tc_map(dev, i,
+					       mqprio->prio_tc_map[i]);
+	}
+
+	if (!__ethtool_get_link_ksettings(dev, &ecmd))
+		link_speed = ecmd.base.speed;
+	else
+		link_speed = SPEED_1000;
+
+	q->picos_per_byte = div64_s64(NSEC_PER_SEC * 1000LL * 8,
+				      link_speed * 1000 * 1000);
+
+	start = taprio_get_start_time(sch);
+	if (!start)
+		return 0;
+
+	taprio_start_sched(sch, start);
+
+	return 0;
+}
+
+static void taprio_destroy(struct Qdisc *sch)
+{
+	struct taprio_sched *q = qdisc_priv(sch);
+	struct net_device *dev = qdisc_dev(sch);
+	struct sched_entry *entry, *n;
+	unsigned int i;
+
+	hrtimer_cancel(&q->advance_timer);
+
+	if (q->qdiscs) {
+		for (i = 0; i < dev->num_tx_queues && q->qdiscs[i]; i++)
+			qdisc_put(q->qdiscs[i]);
+
+		kfree(q->qdiscs);
+	}
+	q->qdiscs = NULL;
+
+	netdev_set_num_tc(dev, 0);
+
+	list_for_each_entry_safe(entry, n, &q->entries, list) {
+		list_del(&entry->list);
+		kfree(entry);
+	}
+}
+
+static int taprio_init(struct Qdisc *sch, struct nlattr *opt,
+		       struct netlink_ext_ack *extack)
+{
+	struct taprio_sched *q = qdisc_priv(sch);
+	struct net_device *dev = qdisc_dev(sch);
+
+	INIT_LIST_HEAD(&q->entries);
+	spin_lock_init(&q->current_entry_lock);
+
+	/* We may overwrite the configuration later */
+	hrtimer_init(&q->advance_timer, CLOCK_TAI, HRTIMER_MODE_ABS);
+
+	q->root = sch;
+
+	/* We only support static clockids. Use an invalid value as default
+	 * and get the valid one on taprio_change().
+	 */
+	q->clockid = -1;
+
+	if (sch->parent != TC_H_ROOT)
+		return -EOPNOTSUPP;
+
+	if (!netif_is_multiqueue(dev))
+		return -EOPNOTSUPP;
+
+	/* pre-allocate qdisc, attachment can't fail */
+	q->qdiscs = kcalloc(dev->num_tx_queues,
+			    sizeof(q->qdiscs[0]),
+			    GFP_KERNEL);
+
+	if (!q->qdiscs)
+		return -ENOMEM;
+
+	if (!opt)
+		return -EINVAL;
+
+	return taprio_change(sch, opt, extack);
+}
+
+static struct netdev_queue *taprio_queue_get(struct Qdisc *sch,
+					     unsigned long cl)
+{
+	struct net_device *dev = qdisc_dev(sch);
+	unsigned long ntx = cl - 1;
+
+	if (ntx >= dev->num_tx_queues)
+		return NULL;
+
+	return netdev_get_tx_queue(dev, ntx);
+}
+
+static int taprio_graft(struct Qdisc *sch, unsigned long cl,
+			struct Qdisc *new, struct Qdisc **old,
+			struct netlink_ext_ack *extack)
+{
+	struct taprio_sched *q = qdisc_priv(sch);
+	struct net_device *dev = qdisc_dev(sch);
+	struct netdev_queue *dev_queue = taprio_queue_get(sch, cl);
+
+	if (!dev_queue)
+		return -EINVAL;
+
+	if (dev->flags & IFF_UP)
+		dev_deactivate(dev);
+
+	*old = q->qdiscs[cl - 1];
+	q->qdiscs[cl - 1] = new;
+
+	if (new)
+		new->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
+
+	if (dev->flags & IFF_UP)
+		dev_activate(dev);
+
+	return 0;
+}
+
+static int dump_entry(struct sk_buff *msg,
+		      const struct sched_entry *entry)
+{
+	struct nlattr *item;
+
+	item = nla_nest_start(msg, TCA_TAPRIO_SCHED_ENTRY);
+	if (!item)
+		return -ENOSPC;
+
+	if (nla_put_u32(msg, TCA_TAPRIO_SCHED_ENTRY_INDEX, entry->index))
+		goto nla_put_failure;
+
+	if (nla_put_u8(msg, TCA_TAPRIO_SCHED_ENTRY_CMD, entry->command))
+		goto nla_put_failure;
+
+	if (nla_put_u32(msg, TCA_TAPRIO_SCHED_ENTRY_GATE_MASK,
+			entry->gate_mask))
+		goto nla_put_failure;
+
+	if (nla_put_u32(msg, TCA_TAPRIO_SCHED_ENTRY_INTERVAL,
+			entry->interval))
+		goto nla_put_failure;
+
+	return nla_nest_end(msg, item);
+
+nla_put_failure:
+	nla_nest_cancel(msg, item);
+	return -1;
+}
+
+static int taprio_dump(struct Qdisc *sch, struct sk_buff *skb)
+{
+	struct taprio_sched *q = qdisc_priv(sch);
+	struct net_device *dev = qdisc_dev(sch);
+	struct tc_mqprio_qopt opt = { 0 };
+	struct nlattr *nest, *entry_list;
+	struct sched_entry *entry;
+	unsigned int i;
+
+	opt.num_tc = netdev_get_num_tc(dev);
+	memcpy(opt.prio_tc_map, dev->prio_tc_map, sizeof(opt.prio_tc_map));
+
+	for (i = 0; i < netdev_get_num_tc(dev); i++) {
+		opt.count[i] = dev->tc_to_txq[i].count;
+		opt.offset[i] = dev->tc_to_txq[i].offset;
+	}
+
+	nest = nla_nest_start(skb, TCA_OPTIONS);
+	if (!nest)
+		return -ENOSPC;
+
+	if (nla_put(skb, TCA_TAPRIO_ATTR_PRIOMAP, sizeof(opt), &opt))
+		goto options_error;
+
+	if (nla_put_s64(skb, TCA_TAPRIO_ATTR_SCHED_BASE_TIME,
+			q->base_time, TCA_TAPRIO_PAD))
+		goto options_error;
+
+	if (nla_put_s32(skb, TCA_TAPRIO_ATTR_SCHED_CLOCKID, q->clockid))
+		goto options_error;
+
+	entry_list = nla_nest_start(skb, TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST);
+	if (!entry_list)
+		goto options_error;
+
+	list_for_each_entry(entry, &q->entries, list) {
+		if (dump_entry(skb, entry) < 0)
+			goto options_error;
+	}
+
+	nla_nest_end(skb, entry_list);
+
+	return nla_nest_end(skb, nest);
+
+options_error:
+	nla_nest_cancel(skb, nest);
+	return -1;
+}
+
+static struct Qdisc *taprio_leaf(struct Qdisc *sch, unsigned long cl)
+{
+	struct netdev_queue *dev_queue = taprio_queue_get(sch, cl);
+
+	if (!dev_queue)
+		return NULL;
+
+	return dev_queue->qdisc_sleeping;
+}
+
+static unsigned long taprio_find(struct Qdisc *sch, u32 classid)
+{
+	unsigned int ntx = TC_H_MIN(classid);
+
+	if (!taprio_queue_get(sch, ntx))
+		return 0;
+	return ntx;
+}
+
+static int taprio_dump_class(struct Qdisc *sch, unsigned long cl,
+			     struct sk_buff *skb, struct tcmsg *tcm)
+{
+	struct netdev_queue *dev_queue = taprio_queue_get(sch, cl);
+
+	tcm->tcm_parent = TC_H_ROOT;
+	tcm->tcm_handle |= TC_H_MIN(cl);
+	tcm->tcm_info = dev_queue->qdisc_sleeping->handle;
+
+	return 0;
+}
+
+static int taprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
+				   struct gnet_dump *d)
+	__releases(d->lock)
+	__acquires(d->lock)
+{
+	struct netdev_queue *dev_queue = taprio_queue_get(sch, cl);
+
+	sch = dev_queue->qdisc_sleeping;
+	if (gnet_stats_copy_basic(&sch->running, d, NULL, &sch->bstats) < 0 ||
+	    gnet_stats_copy_queue(d, NULL, &sch->qstats, sch->q.qlen) < 0)
+		return -1;
+	return 0;
+}
+
+static void taprio_walk(struct Qdisc *sch, struct qdisc_walker *arg)
+{
+	struct net_device *dev = qdisc_dev(sch);
+	unsigned long ntx;
+
+	if (arg->stop)
+		return;
+
+	arg->count = arg->skip;
+	for (ntx = arg->skip; ntx < dev->num_tx_queues; ntx++) {
+		if (arg->fn(sch, ntx + 1, arg) < 0) {
+			arg->stop = 1;
+			break;
+		}
+		arg->count++;
+	}
+}
+
+static struct netdev_queue *taprio_select_queue(struct Qdisc *sch,
+						struct tcmsg *tcm)
+{
+	return taprio_queue_get(sch, TC_H_MIN(tcm->tcm_parent));
+}
+
+static const struct Qdisc_class_ops taprio_class_ops = {
+	.graft		= taprio_graft,
+	.leaf		= taprio_leaf,
+	.find		= taprio_find,
+	.walk		= taprio_walk,
+	.dump		= taprio_dump_class,
+	.dump_stats	= taprio_dump_class_stats,
+	.select_queue	= taprio_select_queue,
+};
+
+static struct Qdisc_ops taprio_qdisc_ops __read_mostly = {
+	.cl_ops		= &taprio_class_ops,
+	.id		= "taprio",
+	.priv_size	= sizeof(struct taprio_sched),
+	.init		= taprio_init,
+	.destroy	= taprio_destroy,
+	.peek		= taprio_peek,
+	.dequeue	= taprio_dequeue,
+	.enqueue	= taprio_enqueue,
+	.dump		= taprio_dump,
+	.owner		= THIS_MODULE,
+};
+
+static int __init taprio_module_init(void)
+{
+	return register_qdisc(&taprio_qdisc_ops);
+}
+
+static void __exit taprio_module_exit(void)
+{
+	unregister_qdisc(&taprio_qdisc_ops);
+}
+
+module_init(taprio_module_init);
+module_exit(taprio_module_exit);
+MODULE_LICENSE("GPL");
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index 6f74a426f159..942dcca09cf2 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -162,7 +162,7 @@ static int tbf_segment(struct sk_buff *skb, struct Qdisc *sch,
 	nb = 0;
 	while (segs) {
 		nskb = segs->next;
-		segs->next = NULL;
+		skb_mark_not_on_list(segs);
 		qdisc_skb_cb(segs)->pkt_len = segs->len;
 		len += segs->len;
 		ret = qdisc_enqueue(segs, q->qdisc, to_free);
@@ -392,7 +392,7 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt,
 	if (child) {
 		qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen,
 					  q->qdisc->qstats.backlog);
-		qdisc_destroy(q->qdisc);
+		qdisc_put(q->qdisc);
 		q->qdisc = child;
 	}
 	q->limit = qopt->limit;
@@ -438,7 +438,7 @@ static void tbf_destroy(struct Qdisc *sch)
 	struct tbf_sched_data *q = qdisc_priv(sch);
 
 	qdisc_watchdog_cancel(&q->watchdog);
-	qdisc_destroy(q->qdisc);
+	qdisc_put(q->qdisc);
 }
 
 static int tbf_dump(struct Qdisc *sch, struct sk_buff *skb)