33 files changed, 1226 insertions, 507 deletions
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index f711a471d0b7..d3d7a0a66e28 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -286,6 +286,15 @@ config NET_SCH_FQ
 
 	  If unsure, say N.
 
+config NET_SCH_HHF
+	tristate "Heavy-Hitter Filter (HHF)"
+	help
+	  Say Y here if you want to use the Heavy-Hitter Filter (HHF)
+	  packet scheduling algorithm.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called sch_hhf.
+
 config NET_SCH_INGRESS
 	tristate "Ingress Qdisc"
 	depends on NET_CLS_ACT
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 35fa47a494ab..3442e5fbc4d7 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -40,6 +40,7 @@ obj-$(CONFIG_NET_SCH_QFQ)	+= sch_qfq.o
 obj-$(CONFIG_NET_SCH_CODEL)	+= sch_codel.o
 obj-$(CONFIG_NET_SCH_FQ_CODEL)	+= sch_fq_codel.o
 obj-$(CONFIG_NET_SCH_FQ)	+= sch_fq.o
+obj-$(CONFIG_NET_SCH_HHF)	+= sch_hhf.o
 
 obj-$(CONFIG_NET_CLS_U32)	+= cls_u32.o
 obj-$(CONFIG_NET_CLS_ROUTE4)	+= cls_route.o
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 4adbce8f8314..6f103fd76c17 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -29,25 +29,16 @@
 
 void tcf_hash_destroy(struct tcf_common *p, struct tcf_hashinfo *hinfo)
 {
-	unsigned int h = tcf_hash(p->tcfc_index, hinfo->hmask);
-	struct tcf_common **p1p;
-
-	for (p1p = &hinfo->htab[h]; *p1p; p1p = &(*p1p)->tcfc_next) {
-		if (*p1p == p) {
-			write_lock_bh(hinfo->lock);
-			*p1p = p->tcfc_next;
-			write_unlock_bh(hinfo->lock);
-			gen_kill_estimator(&p->tcfc_bstats,
-					   &p->tcfc_rate_est);
-			/*
-			 * gen_estimator est_timer() might access p->tcfc_lock
-			 * or bstats, wait a RCU grace period before freeing p
-			 */
-			kfree_rcu(p, tcfc_rcu);
-			return;
-		}
-	}
-	WARN_ON(1);
+	spin_lock_bh(&hinfo->lock);
+	hlist_del(&p->tcfc_head);
+	spin_unlock_bh(&hinfo->lock);
+	gen_kill_estimator(&p->tcfc_bstats,
+			   &p->tcfc_rate_est);
+	/*
+	 * gen_estimator est_timer() might access p->tcfc_lock
+	 * or bstats, wait a RCU grace period before freeing p
+	 */
+	kfree_rcu(p, tcfc_rcu);
 }
 EXPORT_SYMBOL(tcf_hash_destroy);
 
@@ -73,18 +64,19 @@ EXPORT_SYMBOL(tcf_hash_release);
 static int tcf_dump_walker(struct sk_buff *skb, struct netlink_callback *cb,
 			   struct tc_action *a, struct tcf_hashinfo *hinfo)
 {
+	struct hlist_head *head;
 	struct tcf_common *p;
 	int err = 0, index = -1, i = 0, s_i = 0, n_i = 0;
 	struct nlattr *nest;
 
-	read_lock_bh(hinfo->lock);
+	spin_lock_bh(&hinfo->lock);
 
 	s_i = cb->args[0];
 
 	for (i = 0; i < (hinfo->hmask + 1); i++) {
-		p = hinfo->htab[tcf_hash(i, hinfo->hmask)];
+		head = &hinfo->htab[tcf_hash(i, hinfo->hmask)];
 
-		for (; p; p = p->tcfc_next) {
+		hlist_for_each_entry_rcu(p, head, tcfc_head) {
 			index++;
 			if (index < s_i)
 				continue;
@@ -107,7 +99,7 @@ static int tcf_dump_walker(struct sk_buff *skb, struct netlink_callback *cb,
 		}
 	}
 done:
-	read_unlock_bh(hinfo->lock);
+	spin_unlock_bh(&hinfo->lock);
 	if (n_i)
 		cb->args[0] += n_i;
 	return n_i;
@@ -120,7 +112,9 @@ nla_put_failure:
 static int tcf_del_walker(struct sk_buff *skb, struct tc_action *a,
 			  struct tcf_hashinfo *hinfo)
 {
-	struct tcf_common *p, *s_p;
+	struct hlist_head *head;
+	struct hlist_node *n;
+	struct tcf_common *p;
 	struct nlattr *nest;
 	int i = 0, n_i = 0;
 
@@ -130,14 +124,11 @@ static int tcf_del_walker(struct sk_buff *skb, struct tc_action *a,
 	if (nla_put_string(skb, TCA_KIND, a->ops->kind))
 		goto nla_put_failure;
 	for (i = 0; i < (hinfo->hmask + 1); i++) {
-		p = hinfo->htab[tcf_hash(i, hinfo->hmask)];
-
-		while (p != NULL) {
-			s_p = p->tcfc_next;
+		head = &hinfo->htab[tcf_hash(i, hinfo->hmask)];
+		hlist_for_each_entry_safe(p, n, head, tcfc_head) {
 			if (ACT_P_DELETED == tcf_hash_release(p, 0, hinfo))
 				module_put(a->ops->owner);
 			n_i++;
-			p = s_p;
 		}
 	}
 	if (nla_put_u32(skb, TCA_FCNT, n_i))
@@ -150,8 +141,8 @@ nla_put_failure:
 	return -EINVAL;
 }
 
-int tcf_generic_walker(struct sk_buff *skb, struct netlink_callback *cb,
-		       int type, struct tc_action *a)
+static int tcf_generic_walker(struct sk_buff *skb, struct netlink_callback *cb,
+			      int type, struct tc_action *a)
 {
 	struct tcf_hashinfo *hinfo = a->ops->hinfo;
 
@@ -164,19 +155,18 @@ int tcf_generic_walker(struct sk_buff *skb, struct netlink_callback *cb,
 		return -EINVAL;
 	}
 }
-EXPORT_SYMBOL(tcf_generic_walker);
 
 struct tcf_common *tcf_hash_lookup(u32 index, struct tcf_hashinfo *hinfo)
 {
-	struct tcf_common *p;
+	struct tcf_common *p = NULL;
+	struct hlist_head *head;
 
-	read_lock_bh(hinfo->lock);
-	for (p = hinfo->htab[tcf_hash(index, hinfo->hmask)]; p;
-	     p = p->tcfc_next) {
+	spin_lock_bh(&hinfo->lock);
+	head = &hinfo->htab[tcf_hash(index, hinfo->hmask)];
+	hlist_for_each_entry_rcu(p, head, tcfc_head)
 		if (p->tcfc_index == index)
 			break;
-	}
-	read_unlock_bh(hinfo->lock);
+	spin_unlock_bh(&hinfo->lock);
 
 	return p;
 }
@@ -196,7 +186,7 @@ u32 tcf_hash_new_index(u32 *idx_gen, struct tcf_hashinfo *hinfo)
 }
 EXPORT_SYMBOL(tcf_hash_new_index);
 
-int tcf_hash_search(struct tc_action *a, u32 index)
+static int tcf_hash_search(struct tc_action *a, u32 index)
 {
 	struct tcf_hashinfo *hinfo = a->ops->hinfo;
 	struct tcf_common *p = tcf_hash_lookup(index, hinfo);
@@ -207,7 +197,6 @@ int tcf_hash_search(struct tc_action *a, u32 index)
 	}
 	return 0;
 }
-EXPORT_SYMBOL(tcf_hash_search);
 
 struct tcf_common *tcf_hash_check(u32 index, struct tc_action *a, int bind,
 				  struct tcf_hashinfo *hinfo)
@@ -236,6 +225,7 @@ struct tcf_common *tcf_hash_create(u32 index, struct nlattr *est,
 		p->tcfc_bindcnt = 1;
 
 	spin_lock_init(&p->tcfc_lock);
+	INIT_HLIST_NODE(&p->tcfc_head);
 	p->tcfc_index = index ? index : tcf_hash_new_index(idx_gen, hinfo);
 	p->tcfc_tm.install = jiffies;
 	p->tcfc_tm.lastuse = jiffies;
@@ -257,19 +247,18 @@ void tcf_hash_insert(struct tcf_common *p, struct tcf_hashinfo *hinfo)
 {
 	unsigned int h = tcf_hash(p->tcfc_index, hinfo->hmask);
 
-	write_lock_bh(hinfo->lock);
-	p->tcfc_next = hinfo->htab[h];
-	hinfo->htab[h] = p;
-	write_unlock_bh(hinfo->lock);
+	spin_lock_bh(&hinfo->lock);
+	hlist_add_head(&p->tcfc_head, &hinfo->htab[h]);
+	spin_unlock_bh(&hinfo->lock);
 }
 EXPORT_SYMBOL(tcf_hash_insert);
 
-static struct tc_action_ops *act_base = NULL;
+static LIST_HEAD(act_base);
 static DEFINE_RWLOCK(act_mod_lock);
 
 int tcf_register_action(struct tc_action_ops *act)
 {
-	struct tc_action_ops *a, **ap;
+	struct tc_action_ops *a;
 
 	/* Must supply act, dump, cleanup and init */
 	if (!act->act || !act->dump || !act->cleanup || !act->init)
@@ -282,14 +271,13 @@ int tcf_register_action(struct tc_action_ops *act)
 		act->walk = tcf_generic_walker;
 
 	write_lock(&act_mod_lock);
-	for (ap = &act_base; (a = *ap) != NULL; ap = &a->next) {
+	list_for_each_entry(a, &act_base, head) {
 		if (act->type == a->type || (strcmp(act->kind, a->kind) == 0)) {
 			write_unlock(&act_mod_lock);
 			return -EEXIST;
 		}
 	}
-	act->next = NULL;
-	*ap = act;
+	list_add_tail(&act->head, &act_base);
 	write_unlock(&act_mod_lock);
 	return 0;
 }
@@ -297,17 +285,16 @@ EXPORT_SYMBOL(tcf_register_action);
 
 int tcf_unregister_action(struct tc_action_ops *act)
 {
-	struct tc_action_ops *a, **ap;
+	struct tc_action_ops *a;
 	int err = -ENOENT;
 
 	write_lock(&act_mod_lock);
-	for (ap = &act_base; (a = *ap) != NULL; ap = &a->next)
-		if (a == act)
+	list_for_each_entry(a, &act_base, head) {
+		if (a == act) {
+			list_del(&act->head);
+			err = 0;
 			break;
-	if (a) {
-		*ap = a->next;
-		a->next = NULL;
-		err = 0;
+		}
 	}
 	write_unlock(&act_mod_lock);
 	return err;
@@ -317,69 +304,42 @@ EXPORT_SYMBOL(tcf_unregister_action);
 /* lookup by name */
 static struct tc_action_ops *tc_lookup_action_n(char *kind)
 {
-	struct tc_action_ops *a = NULL;
+	struct tc_action_ops *a, *res = NULL;
 
 	if (kind) {
 		read_lock(&act_mod_lock);
-		for (a = act_base; a; a = a->next) {
+		list_for_each_entry(a, &act_base, head) {
 			if (strcmp(kind, a->kind) == 0) {
-				if (!try_module_get(a->owner)) {
-					read_unlock(&act_mod_lock);
-					return NULL;
-				}
+				if (try_module_get(a->owner))
+					res = a;
 				break;
 			}
 		}
 		read_unlock(&act_mod_lock);
 	}
-	return a;
+	return res;
 }
 
 /* lookup by nlattr */
 static struct tc_action_ops *tc_lookup_action(struct nlattr *kind)
 {
-	struct tc_action_ops *a = NULL;
+	struct tc_action_ops *a, *res = NULL;
 
 	if (kind) {
 		read_lock(&act_mod_lock);
-		for (a = act_base; a; a = a->next) {
+		list_for_each_entry(a, &act_base, head) {
 			if (nla_strcmp(kind, a->kind) == 0) {
-				if (!try_module_get(a->owner)) {
-					read_unlock(&act_mod_lock);
-					return NULL;
-				}
+				if (try_module_get(a->owner))
+					res = a;
 				break;
 			}
 		}
 		read_unlock(&act_mod_lock);
 	}
-	return a;
+	return res;
 }
 
-#if 0
-/* lookup by id */
-static struct tc_action_ops *tc_lookup_action_id(u32 type)
-{
-	struct tc_action_ops *a = NULL;
-
-	if (type) {
-		read_lock(&act_mod_lock);
-		for (a = act_base; a; a = a->next) {
-			if (a->type == type) {
-				if (!try_module_get(a->owner)) {
-					read_unlock(&act_mod_lock);
-					return NULL;
-				}
-				break;
-			}
-		}
-		read_unlock(&act_mod_lock);
-	}
-	return a;
-}
-#endif
-
-int tcf_action_exec(struct sk_buff *skb, const struct tc_action *act,
+int tcf_action_exec(struct sk_buff *skb, const struct list_head *actions,
 		    struct tcf_result *res)
 {
 	const struct tc_action *a;
@@ -390,7 +350,7 @@ int tcf_action_exec(struct sk_buff *skb, const struct tc_action *act,
 		ret = TC_ACT_OK;
 		goto exec_done;
 	}
-	while ((a = act) != NULL) {
+	list_for_each_entry(a, actions, list) {
 repeat:
 		if (a->ops) {
 			ret = a->ops->act(skb, a, res);
@@ -404,27 +364,26 @@ repeat:
 			if (ret != TC_ACT_PIPE)
 				goto exec_done;
 		}
-		act = a->next;
 	}
 exec_done:
 	return ret;
 }
 EXPORT_SYMBOL(tcf_action_exec);
 
-void tcf_action_destroy(struct tc_action *act, int bind)
+void tcf_action_destroy(struct list_head *actions, int bind)
 {
-	struct tc_action *a;
+	struct tc_action *a, *tmp;
 
-	for (a = act; a; a = act) {
+	list_for_each_entry_safe(a, tmp, actions, list) {
 		if (a->ops) {
 			if (a->ops->cleanup(a, bind) == ACT_P_DELETED)
 				module_put(a->ops->owner);
-			act = act->next;
+			list_del(&a->list);
 			kfree(a);
 		} else {
 			/*FIXME: Remove later - catch insertion bugs*/
 			WARN(1, "tcf_action_destroy: BUG? destroying NULL ops\n");
-			act = act->next;
+			list_del(&a->list);
 			kfree(a);
 		}
 	}
@@ -470,14 +429,13 @@ nla_put_failure:
 EXPORT_SYMBOL(tcf_action_dump_1);
 
 int
-tcf_action_dump(struct sk_buff *skb, struct tc_action *act, int bind, int ref)
+tcf_action_dump(struct sk_buff *skb, struct list_head *actions, int bind, int ref)
 {
 	struct tc_action *a;
 	int err = -EINVAL;
 	struct nlattr *nest;
 
-	while ((a = act) != NULL) {
-		act = a->next;
+	list_for_each_entry(a, actions, list) {
 		nest = nla_nest_start(skb, a->order);
 		if (nest == NULL)
 			goto nla_put_failure;
@@ -552,6 +510,7 @@ struct tc_action *tcf_action_init_1(struct net *net, struct nlattr *nla,
 	if (a == NULL)
 		goto err_mod;
 
+	INIT_LIST_HEAD(&a->list);
 	/* backward compatibility for policer */
 	if (name == NULL)
 		err = a_o->init(net, tb[TCA_ACT_OPTIONS], est, a, ovr, bind);
@@ -578,37 +537,33 @@ err_out:
 	return ERR_PTR(err);
 }
 
-struct tc_action *tcf_action_init(struct net *net, struct nlattr *nla,
+int tcf_action_init(struct net *net, struct nlattr *nla,
 				  struct nlattr *est, char *name, int ovr,
-				  int bind)
+				  int bind, struct list_head *actions)
 {
 	struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
-	struct tc_action *head = NULL, *act, *act_prev = NULL;
+	struct tc_action *act;
 	int err;
 	int i;
 
 	err = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL);
 	if (err < 0)
-		return ERR_PTR(err);
+		return err;
 
 	for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
 		act = tcf_action_init_1(net, tb[i], est, name, ovr, bind);
-		if (IS_ERR(act))
+		if (IS_ERR(act)) {
+			err = PTR_ERR(act);
 			goto err;
+		}
 		act->order = i;
-
-		if (head == NULL)
-			head = act;
-		else
-			act_prev->next = act;
-		act_prev = act;
+		list_add_tail(&act->list, actions);
 	}
-	return head;
+	return 0;
 
 err:
-	if (head != NULL)
-		tcf_action_destroy(head, bind);
-	return act;
+	tcf_action_destroy(actions, bind);
+	return err;
 }
 
 int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *a,
@@ -637,10 +592,6 @@ int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *a,
 	if (err < 0)
 		goto errout;
 
-	if (a->ops != NULL && a->ops->get_stats != NULL)
-		if (a->ops->get_stats(skb, a) < 0)
-			goto errout;
-
 	if (gnet_stats_copy_basic(&d, &h->tcf_bstats) < 0 ||
 	    gnet_stats_copy_rate_est(&d, &h->tcf_bstats,
 				     &h->tcf_rate_est) < 0 ||
@@ -657,7 +608,7 @@ errout:
 }
 
 static int
-tca_get_fill(struct sk_buff *skb, struct tc_action *a, u32 portid, u32 seq,
+tca_get_fill(struct sk_buff *skb, struct list_head *actions, u32 portid, u32 seq,
 	     u16 flags, int event, int bind, int ref)
 {
 	struct tcamsg *t;
@@ -677,7 +628,7 @@ tca_get_fill(struct sk_buff *skb, struct tc_action *a, u32 portid, u32 seq,
 	if (nest == NULL)
 		goto out_nlmsg_trim;
 
-	if (tcf_action_dump(skb, a, bind, ref) < 0)
+	if (tcf_action_dump(skb, actions, bind, ref) < 0)
 		goto out_nlmsg_trim;
 
 	nla_nest_end(skb, nest);
@@ -692,14 +643,14 @@ out_nlmsg_trim:
 
 static int
 act_get_notify(struct net *net, u32 portid, struct nlmsghdr *n,
-	       struct tc_action *a, int event)
+	       struct list_head *actions, int event)
 {
 	struct sk_buff *skb;
 
 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
 	if (!skb)
 		return -ENOBUFS;
-	if (tca_get_fill(skb, a, portid, n->nlmsg_seq, 0, event, 0, 0) <= 0) {
+	if (tca_get_fill(skb, actions, portid, n->nlmsg_seq, 0, event, 0, 0) <= 0) {
 		kfree_skb(skb);
 		return -EINVAL;
 	}
@@ -730,6 +681,7 @@ tcf_action_get_1(struct nlattr *nla, struct nlmsghdr *n, u32 portid)
 	if (a == NULL)
 		goto err_out;
 
+	INIT_LIST_HEAD(&a->list);
 	err = -EINVAL;
 	a->ops = tc_lookup_action(tb[TCA_ACT_KIND]);
 	if (a->ops == NULL)
@@ -749,12 +701,12 @@ err_out:
 	return ERR_PTR(err);
 }
 
-static void cleanup_a(struct tc_action *act)
+static void cleanup_a(struct list_head *actions)
 {
-	struct tc_action *a;
+	struct tc_action *a, *tmp;
 
-	for (a = act; a; a = act) {
-		act = a->next;
+	list_for_each_entry_safe(a, tmp, actions, list) {
+		list_del(&a->list);
 		kfree(a);
 	}
 }
@@ -769,6 +721,7 @@ static struct tc_action *create_a(int i)
 		return NULL;
 	}
 	act->order = i;
+	INIT_LIST_HEAD(&act->list);
 	return act;
 }
 
@@ -856,7 +809,8 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
 {
 	int i, ret;
 	struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
-	struct tc_action *head = NULL, *act, *act_prev = NULL;
+	struct tc_action *act;
+	LIST_HEAD(actions);
 
 	ret = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL);
 	if (ret < 0)
@@ -876,16 +830,11 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
 			goto err;
 		}
 		act->order = i;
-
-		if (head == NULL)
-			head = act;
-		else
-			act_prev->next = act;
-		act_prev = act;
+		list_add_tail(&act->list, &actions);
 	}
 
 	if (event == RTM_GETACTION)
-		ret = act_get_notify(net, portid, n, head, event);
+		ret = act_get_notify(net, portid, n, &actions, event);
 	else { /* delete */
 		struct sk_buff *skb;
 
@@ -895,7 +844,7 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
 			goto err;
 		}
 
-		if (tca_get_fill(skb, head, portid, n->nlmsg_seq, 0, event,
+		if (tca_get_fill(skb, &actions, portid, n->nlmsg_seq, 0, event,
 				 0, 1) <= 0) {
 			kfree_skb(skb);
 			ret = -EINVAL;
@@ -903,7 +852,7 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
 		}
 
 		/* now do the delete */
-		tcf_action_destroy(head, 0);
+		tcf_action_destroy(&actions, 0);
 		ret = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
 				     n->nlmsg_flags & NLM_F_ECHO);
 		if (ret > 0)
@@ -911,11 +860,11 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
 		return ret;
 	}
 err:
-	cleanup_a(head);
+	cleanup_a(&actions);
 	return ret;
 }
 
-static int tcf_add_notify(struct net *net, struct tc_action *a,
+static int tcf_add_notify(struct net *net, struct list_head *actions,
 			  u32 portid, u32 seq, int event, u16 flags)
 {
 	struct tcamsg *t;
@@ -943,7 +892,7 @@ static int tcf_add_notify(struct net *net, struct tc_action *a,
 	if (nest == NULL)
 		goto out_kfree_skb;
 
-	if (tcf_action_dump(skb, a, 0, 0) < 0)
+	if (tcf_action_dump(skb, actions, 0, 0) < 0)
 		goto out_kfree_skb;
 
 	nla_nest_end(skb, nest);
@@ -967,26 +916,18 @@ tcf_action_add(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
 	       u32 portid, int ovr)
 {
 	int ret = 0;
-	struct tc_action *act;
-	struct tc_action *a;
+	LIST_HEAD(actions);
 	u32 seq = n->nlmsg_seq;
 
-	act = tcf_action_init(net, nla, NULL, NULL, ovr, 0);
-	if (act == NULL)
-		goto done;
-	if (IS_ERR(act)) {
-		ret = PTR_ERR(act);
+	ret = tcf_action_init(net, nla, NULL, NULL, ovr, 0, &actions);
+	if (ret)
 		goto done;
-	}
 
 	/* dump then free all the actions after update; inserted policy
 	 * stays intact
 	 */
-	ret = tcf_add_notify(net, act, portid, seq, RTM_NEWACTION, n->nlmsg_flags);
-	for (a = act; a; a = act) {
-		act = a->next;
-		kfree(a);
-	}
+	ret = tcf_add_notify(net, &actions, portid, seq, RTM_NEWACTION, n->nlmsg_flags);
+	cleanup_a(&actions);
 done:
 	return ret;
 }
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index 5c5edf56adbd..9cc6717c5f19 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -37,15 +37,8 @@
 #include <net/tc_act/tc_csum.h>
 
 #define CSUM_TAB_MASK 15
-static struct tcf_common *tcf_csum_ht[CSUM_TAB_MASK + 1];
 static u32 csum_idx_gen;
-static DEFINE_RWLOCK(csum_lock);
-
-static struct tcf_hashinfo csum_hash_info = {
-	.htab	= tcf_csum_ht,
-	.hmask	= CSUM_TAB_MASK,
-	.lock	= &csum_lock,
-};
+static struct tcf_hashinfo csum_hash_info;
 
 static const struct nla_policy csum_policy[TCA_CSUM_MAX + 1] = {
 	[TCA_CSUM_PARMS] = { .len = sizeof(struct tc_csum), },
@@ -593,6 +586,10 @@ MODULE_LICENSE("GPL");
 
 static int __init csum_init_module(void)
 {
+	int err = tcf_hashinfo_init(&csum_hash_info, CSUM_TAB_MASK);
+	if (err)
+		return err;
+
 	return tcf_register_action(&act_csum_ops);
 }
 
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index 5645a4d32abd..dea927343bf4 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -24,15 +24,8 @@
 #include <net/tc_act/tc_gact.h>
 
 #define GACT_TAB_MASK	15
-static struct tcf_common *tcf_gact_ht[GACT_TAB_MASK + 1];
 static u32 gact_idx_gen;
-static DEFINE_RWLOCK(gact_lock);
-
-static struct tcf_hashinfo gact_hash_info = {
-	.htab	=	tcf_gact_ht,
-	.hmask	=	GACT_TAB_MASK,
-	.lock	=	&gact_lock,
-};
+static struct tcf_hashinfo gact_hash_info;
 
 #ifdef CONFIG_GACT_PROB
 static int gact_net_rand(struct tcf_gact *gact)
@@ -215,6 +208,9 @@ MODULE_LICENSE("GPL");
 
 static int __init gact_init_module(void)
 {
+	int err = tcf_hashinfo_init(&gact_hash_info, GACT_TAB_MASK);
+	if (err)
+		return err;
 #ifdef CONFIG_GACT_PROB
 	pr_info("GACT probability on\n");
 #else
@@ -226,6 +222,7 @@ static int __init gact_init_module(void)
 static void __exit gact_cleanup_module(void)
 {
 	tcf_unregister_action(&act_gact_ops);
+	tcf_hashinfo_destroy(&gact_hash_info);
 }
 
 module_init(gact_init_module);
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 882a89762f77..e13ecbbfe8c4 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -29,15 +29,8 @@
 
 
 #define IPT_TAB_MASK     15
-static struct tcf_common *tcf_ipt_ht[IPT_TAB_MASK + 1];
 static u32 ipt_idx_gen;
-static DEFINE_RWLOCK(ipt_lock);
-
-static struct tcf_hashinfo ipt_hash_info = {
-	.htab	=	tcf_ipt_ht,
-	.hmask	=	IPT_TAB_MASK,
-	.lock	=	&ipt_lock,
-};
+static struct tcf_hashinfo ipt_hash_info;
 
 static int ipt_init_target(struct xt_entry_target *t, char *table, unsigned int hook)
 {
@@ -320,7 +313,11 @@ MODULE_ALIAS("act_xt");
 
 static int __init ipt_init_module(void)
 {
-	int ret1, ret2;
+	int ret1, ret2, err;
+	err = tcf_hashinfo_init(&ipt_hash_info, IPT_TAB_MASK);
+	if (err)
+		return err;
+
 	ret1 = tcf_register_action(&act_xt_ops);
 	if (ret1 < 0)
 		printk("Failed to load xt action\n");
@@ -328,9 +325,10 @@ static int __init ipt_init_module(void)
 	if (ret2 < 0)
 		printk("Failed to load ipt action\n");
 
-	if (ret1 < 0 && ret2 < 0)
+	if (ret1 < 0 && ret2 < 0) {
+		tcf_hashinfo_destroy(&ipt_hash_info);
 		return ret1;
-	else
+	} else
 		return 0;
 }
 
@@ -338,6 +336,7 @@ static void __exit ipt_cleanup_module(void)
 {
 	tcf_unregister_action(&act_xt_ops);
 	tcf_unregister_action(&act_ipt_ops);
+	tcf_hashinfo_destroy(&ipt_hash_info);
 }
 
 module_init(ipt_init_module);
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 252378121ce7..9dbb8cd64cb0 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -30,16 +30,9 @@
 #include <linux/if_arp.h>
 
 #define MIRRED_TAB_MASK     7
-static struct tcf_common *tcf_mirred_ht[MIRRED_TAB_MASK + 1];
 static u32 mirred_idx_gen;
-static DEFINE_RWLOCK(mirred_lock);
 static LIST_HEAD(mirred_list);
-
-static struct tcf_hashinfo mirred_hash_info = {
-	.htab	=	tcf_mirred_ht,
-	.hmask	=	MIRRED_TAB_MASK,
-	.lock	=	&mirred_lock,
-};
+static struct tcf_hashinfo mirred_hash_info;
 
 static int tcf_mirred_release(struct tcf_mirred *m, int bind)
 {
@@ -261,7 +254,6 @@ static struct notifier_block mirred_device_notifier = {
 	.notifier_call = mirred_device_event,
 };
 
-
 static struct tc_action_ops act_mirred_ops = {
 	.kind		=	"mirred",
 	.hinfo		=	&mirred_hash_info,
@@ -284,14 +276,20 @@ static int __init mirred_init_module(void)
 	if (err)
 		return err;
 
+	err = tcf_hashinfo_init(&mirred_hash_info, MIRRED_TAB_MASK);
+	if (err) {
+		unregister_netdevice_notifier(&mirred_device_notifier);
+		return err;
+	}
 	pr_info("Mirror/redirect action on\n");
 	return tcf_register_action(&act_mirred_ops);
 }
 
 static void __exit mirred_cleanup_module(void)
 {
-	unregister_netdevice_notifier(&mirred_device_notifier);
 	tcf_unregister_action(&act_mirred_ops);
+	tcf_hashinfo_destroy(&mirred_hash_info);
+	unregister_netdevice_notifier(&mirred_device_notifier);
 }
 
 module_init(mirred_init_module);
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index 6a15ace00241..921fea43fca2 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -30,15 +30,9 @@
 
 
 #define NAT_TAB_MASK	15
-static struct tcf_common *tcf_nat_ht[NAT_TAB_MASK + 1];
 static u32 nat_idx_gen;
-static DEFINE_RWLOCK(nat_lock);
 
-static struct tcf_hashinfo nat_hash_info = {
-	.htab	=	tcf_nat_ht,
-	.hmask	=	NAT_TAB_MASK,
-	.lock	=	&nat_lock,
-};
+static struct tcf_hashinfo nat_hash_info;
 
 static const struct nla_policy nat_policy[TCA_NAT_MAX + 1] = {
 	[TCA_NAT_PARMS]	= { .len = sizeof(struct tc_nat) },
@@ -316,12 +310,16 @@ MODULE_LICENSE("GPL");
 
 static int __init nat_init_module(void)
 {
+	int err = tcf_hashinfo_init(&nat_hash_info, NAT_TAB_MASK);
+	if (err)
+		return err;
 	return tcf_register_action(&act_nat_ops);
 }
 
 static void __exit nat_cleanup_module(void)
 {
 	tcf_unregister_action(&act_nat_ops);
+	tcf_hashinfo_destroy(&nat_hash_info);
 }
 
 module_init(nat_init_module);
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index 03b67674169c..e2520e90a10d 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -24,15 +24,9 @@
 #include <net/tc_act/tc_pedit.h>
 
 #define PEDIT_TAB_MASK	15
-static struct tcf_common *tcf_pedit_ht[PEDIT_TAB_MASK + 1];
 static u32 pedit_idx_gen;
-static DEFINE_RWLOCK(pedit_lock);
 
-static struct tcf_hashinfo pedit_hash_info = {
-	.htab	=	tcf_pedit_ht,
-	.hmask	=	PEDIT_TAB_MASK,
-	.lock	=	&pedit_lock,
-};
+static struct tcf_hashinfo pedit_hash_info;
 
 static const struct nla_policy pedit_policy[TCA_PEDIT_MAX + 1] = {
 	[TCA_PEDIT_PARMS]	= { .len = sizeof(struct tc_pedit) },
@@ -252,11 +246,15 @@ MODULE_LICENSE("GPL");
 
 static int __init pedit_init_module(void)
 {
+	int err = tcf_hashinfo_init(&pedit_hash_info, PEDIT_TAB_MASK);
+	if (err)
+		return err;
 	return tcf_register_action(&act_pedit_ops);
 }
 
 static void __exit pedit_cleanup_module(void)
 {
+	tcf_hashinfo_destroy(&pedit_hash_info);
 	tcf_unregister_action(&act_pedit_ops);
 }
 
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 16a62c36928a..819a9a4d1987 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -41,15 +41,8 @@ struct tcf_police {
 	container_of(pc, struct tcf_police, common)
 
 #define POL_TAB_MASK     15
-static struct tcf_common *tcf_police_ht[POL_TAB_MASK + 1];
 static u32 police_idx_gen;
-static DEFINE_RWLOCK(police_lock);
-
-static struct tcf_hashinfo police_hash_info = {
-	.htab	=	tcf_police_ht,
-	.hmask	=	POL_TAB_MASK,
-	.lock	=	&police_lock,
-};
+static struct tcf_hashinfo police_hash_info;
 
 /* old policer structure from before tc actions */
 struct tc_police_compat {
@@ -67,18 +60,19 @@ struct tc_police_compat {
 static int tcf_act_police_walker(struct sk_buff *skb, struct netlink_callback *cb,
 			      int type, struct tc_action *a)
 {
+	struct hlist_head *head;
 	struct tcf_common *p;
 	int err = 0, index = -1, i = 0, s_i = 0, n_i = 0;
 	struct nlattr *nest;
 
-	read_lock_bh(&police_lock);
+	spin_lock_bh(&police_hash_info.lock);
 
 	s_i = cb->args[0];
 
 	for (i = 0; i < (POL_TAB_MASK + 1); i++) {
-		p = tcf_police_ht[tcf_hash(i, POL_TAB_MASK)];
+		head = &police_hash_info.htab[tcf_hash(i, POL_TAB_MASK)];
 
-		for (; p; p = p->tcfc_next) {
+		hlist_for_each_entry_rcu(p, head, tcfc_head) {
 			index++;
 			if (index < s_i)
 				continue;
@@ -101,7 +95,7 @@ static int tcf_act_police_walker(struct sk_buff *skb, struct netlink_callback *c
 		}
 	}
 done:
-	read_unlock_bh(&police_lock);
+	spin_unlock_bh(&police_hash_info.lock);
 	if (n_i)
 		cb->args[0] += n_i;
 	return n_i;
@@ -113,25 +107,16 @@ nla_put_failure:
 
 static void tcf_police_destroy(struct tcf_police *p)
 {
-	unsigned int h = tcf_hash(p->tcf_index, POL_TAB_MASK);
-	struct tcf_common **p1p;
-
-	for (p1p = &tcf_police_ht[h]; *p1p; p1p = &(*p1p)->tcfc_next) {
-		if (*p1p == &p->common) {
-			write_lock_bh(&police_lock);
-			*p1p = p->tcf_next;
-			write_unlock_bh(&police_lock);
-			gen_kill_estimator(&p->tcf_bstats,
-					   &p->tcf_rate_est);
-			/*
-			 * gen_estimator est_timer() might access p->tcf_lock
-			 * or bstats, wait a RCU grace period before freeing p
-			 */
-			kfree_rcu(p, tcf_rcu);
-			return;
-		}
-	}
-	WARN_ON(1);
+	spin_lock_bh(&police_hash_info.lock);
+	hlist_del(&p->tcf_head);
+	spin_unlock_bh(&police_hash_info.lock);
+	gen_kill_estimator(&p->tcf_bstats,
+			   &p->tcf_rate_est);
+	/*
+	 * gen_estimator est_timer() might access p->tcf_lock
+	 * or bstats, wait a RCU grace period before freeing p
+	 */
+	kfree_rcu(p, tcf_rcu);
 }
 
 static const struct nla_policy police_policy[TCA_POLICE_MAX + 1] = {
@@ -266,10 +251,9 @@ override:
 	police->tcf_index = parm->index ? parm->index :
 		tcf_hash_new_index(&police_idx_gen, &police_hash_info);
 	h = tcf_hash(police->tcf_index, POL_TAB_MASK);
-	write_lock_bh(&police_lock);
-	police->tcf_next = tcf_police_ht[h];
-	tcf_police_ht[h] = &police->common;
-	write_unlock_bh(&police_lock);
+	spin_lock_bh(&police_hash_info.lock);
+	hlist_add_head(&police->tcf_head, &police_hash_info.htab[h]);
+	spin_unlock_bh(&police_hash_info.lock);
 
 	a->priv = police;
 	return ret;
@@ -277,10 +261,8 @@ override:
 failure_unlock:
 	spin_unlock_bh(&police->tcf_lock);
 failure:
-	if (P_tab)
-		qdisc_put_rtab(P_tab);
-	if (R_tab)
-		qdisc_put_rtab(R_tab);
+	qdisc_put_rtab(P_tab);
+	qdisc_put_rtab(R_tab);
 	if (ret == ACT_P_CREATED)
 		kfree(police);
 	return err;
@@ -414,12 +396,19 @@ static struct tc_action_ops act_police_ops = {
 static int __init
 police_init_module(void)
 {
-	return tcf_register_action(&act_police_ops);
+	int err = tcf_hashinfo_init(&police_hash_info, POL_TAB_MASK);
+	if (err)
+		return err;
+	err = tcf_register_action(&act_police_ops);
+	if (err)
+		tcf_hashinfo_destroy(&police_hash_info);
+	return err;
 }
 
 static void __exit
 police_cleanup_module(void)
 {
+	tcf_hashinfo_destroy(&police_hash_info);
 	tcf_unregister_action(&act_police_ops);
 }
 
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index 31157d3e729c..81aebc162e5c 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -25,15 +25,8 @@
 #include <net/tc_act/tc_defact.h>
 
 #define SIMP_TAB_MASK     7
-static struct tcf_common *tcf_simp_ht[SIMP_TAB_MASK + 1];
 static u32 simp_idx_gen;
-static DEFINE_RWLOCK(simp_lock);
-
-static struct tcf_hashinfo simp_hash_info = {
-	.htab	=	tcf_simp_ht,
-	.hmask	=	SIMP_TAB_MASK,
-	.lock	=	&simp_lock,
-};
+static struct tcf_hashinfo simp_hash_info;
 
 #define SIMP_MAX_DATA	32
 static int tcf_simp(struct sk_buff *skb, const struct tc_action *a,
@@ -209,14 +202,23 @@ MODULE_LICENSE("GPL");
 
 static int __init simp_init_module(void)
 {
-	int ret = tcf_register_action(&act_simp_ops);
+	int err, ret;
+	err = tcf_hashinfo_init(&simp_hash_info, SIMP_TAB_MASK);
+	if (err)
+		return err;
+
+	ret = tcf_register_action(&act_simp_ops);
 	if (!ret)
 		pr_info("Simple TC action Loaded\n");
+	else
+		tcf_hashinfo_destroy(&simp_hash_info);
+
 	return ret;
 }
 
 static void __exit simp_cleanup_module(void)
 {
+	tcf_hashinfo_destroy(&simp_hash_info);
 	tcf_unregister_action(&act_simp_ops);
 }
 
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index cf20add1c3ff..aa0a4c056f31 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -28,15 +28,8 @@
 #include <net/tc_act/tc_skbedit.h>
 
 #define SKBEDIT_TAB_MASK     15
-static struct tcf_common *tcf_skbedit_ht[SKBEDIT_TAB_MASK + 1];
 static u32 skbedit_idx_gen;
-static DEFINE_RWLOCK(skbedit_lock);
-
-static struct tcf_hashinfo skbedit_hash_info = {
-	.htab	=	tcf_skbedit_ht,
-	.hmask	=	SKBEDIT_TAB_MASK,
-	.lock	=	&skbedit_lock,
-};
+static struct tcf_hashinfo skbedit_hash_info;
 
 static int tcf_skbedit(struct sk_buff *skb, const struct tc_action *a,
 		       struct tcf_result *res)
@@ -210,11 +203,15 @@ MODULE_LICENSE("GPL");
 
 static int __init skbedit_init_module(void)
 {
+	int err = tcf_hashinfo_init(&skbedit_hash_info, SKBEDIT_TAB_MASK);
+	if (err)
+		return err;
 	return tcf_register_action(&act_skbedit_ops);
 }
 
 static void __exit skbedit_cleanup_module(void)
 {
+	tcf_hashinfo_destroy(&skbedit_hash_info);
 	tcf_unregister_action(&act_skbedit_ops);
 }
 
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 8e118af90973..12e882ef596b 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -31,8 +31,7 @@
 #include <net/pkt_cls.h>
 
 /* The list of all installed classifier types */
-
-static struct tcf_proto_ops *tcf_proto_base __read_mostly;
+static LIST_HEAD(tcf_proto_base);
 
 /* Protects list of registered TC modules. It is pure SMP lock. */
 static DEFINE_RWLOCK(cls_mod_lock);
@@ -41,36 +40,35 @@ static DEFINE_RWLOCK(cls_mod_lock);
 
 static const struct tcf_proto_ops *tcf_proto_lookup_ops(struct nlattr *kind)
 {
-	const struct tcf_proto_ops *t = NULL;
+	const struct tcf_proto_ops *t, *res = NULL;
 
 	if (kind) {
 		read_lock(&cls_mod_lock);
-		for (t = tcf_proto_base; t; t = t->next) {
+		list_for_each_entry(t, &tcf_proto_base, head) {
 			if (nla_strcmp(kind, t->kind) == 0) {
-				if (!try_module_get(t->owner))
-					t = NULL;
+				if (try_module_get(t->owner))
+					res = t;
 				break;
 			}
 		}
 		read_unlock(&cls_mod_lock);
 	}
-	return t;
+	return res;
 }
 
 /* Register(unregister) new classifier type */
 
 int register_tcf_proto_ops(struct tcf_proto_ops *ops)
 {
-	struct tcf_proto_ops *t, **tp;
+	struct tcf_proto_ops *t;
 	int rc = -EEXIST;
 
 	write_lock(&cls_mod_lock);
-	for (tp = &tcf_proto_base; (t = *tp) != NULL; tp = &t->next)
+	list_for_each_entry(t, &tcf_proto_base, head)
 		if (!strcmp(ops->kind, t->kind))
 			goto out;
 
-	ops->next = NULL;
-	*tp = ops;
+	list_add_tail(&ops->head, &tcf_proto_base);
 	rc = 0;
 out:
 	write_unlock(&cls_mod_lock);
@@ -80,19 +78,17 @@ EXPORT_SYMBOL(register_tcf_proto_ops);
 
 int unregister_tcf_proto_ops(struct tcf_proto_ops *ops)
 {
-	struct tcf_proto_ops *t, **tp;
+	struct tcf_proto_ops *t;
 	int rc = -ENOENT;
 
 	write_lock(&cls_mod_lock);
-	for (tp = &tcf_proto_base; (t = *tp) != NULL; tp = &t->next)
-		if (t == ops)
+	list_for_each_entry(t, &tcf_proto_base, head) {
+		if (t == ops) {
+			list_del(&t->head);
+			rc = 0;
 			break;
-
-	if (!t)
-		goto out;
-	*tp = t->next;
-	rc = 0;
-out:
+		}
+	}
 	write_unlock(&cls_mod_lock);
 	return rc;
 }
@@ -500,46 +496,41 @@ out:
 void tcf_exts_destroy(struct tcf_proto *tp, struct tcf_exts *exts)
 {
 #ifdef CONFIG_NET_CLS_ACT
-	if (exts->action) {
-		tcf_action_destroy(exts->action, TCA_ACT_UNBIND);
-		exts->action = NULL;
-	}
+	tcf_action_destroy(&exts->actions, TCA_ACT_UNBIND);
+	INIT_LIST_HEAD(&exts->actions);
 #endif
 }
 EXPORT_SYMBOL(tcf_exts_destroy);
 
 int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
-		  struct nlattr *rate_tlv, struct tcf_exts *exts,
-		  const struct tcf_ext_map *map)
+		  struct nlattr *rate_tlv, struct tcf_exts *exts)
 {
-	memset(exts, 0, sizeof(*exts));
-
 #ifdef CONFIG_NET_CLS_ACT
 	{
 		struct tc_action *act;
 
-		if (map->police && tb[map->police]) {
-			act = tcf_action_init_1(net, tb[map->police], rate_tlv,
+		INIT_LIST_HEAD(&exts->actions);
+		if (exts->police && tb[exts->police]) {
+			act = tcf_action_init_1(net, tb[exts->police], rate_tlv,
 						"police", TCA_ACT_NOREPLACE,
 						TCA_ACT_BIND);
 			if (IS_ERR(act))
 				return PTR_ERR(act);
 
-			act->type = TCA_OLD_COMPAT;
-			exts->action = act;
-		} else if (map->action && tb[map->action]) {
-			act = tcf_action_init(net, tb[map->action], rate_tlv,
+			act->type = exts->type = TCA_OLD_COMPAT;
+			list_add(&act->list, &exts->actions);
+		} else if (exts->action && tb[exts->action]) {
+			int err;
+			err = tcf_action_init(net, tb[exts->action], rate_tlv,
 					      NULL, TCA_ACT_NOREPLACE,
-					      TCA_ACT_BIND);
-			if (IS_ERR(act))
-				return PTR_ERR(act);
-
-			exts->action = act;
+					      TCA_ACT_BIND, &exts->actions);
+			if (err)
+				return err;
 		}
 	}
 #else
-	if ((map->action && tb[map->action]) ||
-	    (map->police && tb[map->police]))
+	if ((exts->action && tb[exts->action]) ||
+	    (exts->police && tb[exts->police]))
 		return -EOPNOTSUPP;
 #endif
 
@@ -551,43 +542,44 @@ void tcf_exts_change(struct tcf_proto *tp, struct tcf_exts *dst,
 		     struct tcf_exts *src)
 {
 #ifdef CONFIG_NET_CLS_ACT
-	if (src->action) {
-		struct tc_action *act;
+	if (!list_empty(&src->actions)) {
+		LIST_HEAD(tmp);
 		tcf_tree_lock(tp);
-		act = dst->action;
-		dst->action = src->action;
+		list_splice_init(&dst->actions, &tmp);
+		list_splice(&src->actions, &dst->actions);
 		tcf_tree_unlock(tp);
-		if (act)
-			tcf_action_destroy(act, TCA_ACT_UNBIND);
+		tcf_action_destroy(&tmp, TCA_ACT_UNBIND);
 	}
 #endif
 }
 EXPORT_SYMBOL(tcf_exts_change);
 
-int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts,
-		  const struct tcf_ext_map *map)
+#define tcf_exts_first_act(ext) \
+		list_first_entry(&(exts)->actions, struct tc_action, list)
+
+int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts)
 {
 #ifdef CONFIG_NET_CLS_ACT
-	if (map->action && exts->action) {
+	if (exts->action && !list_empty(&exts->actions)) {
 		/*
 		 * again for backward compatible mode - we want
 		 * to work with both old and new modes of entering
 		 * tc data even if iproute2  was newer - jhs
 		 */
 		struct nlattr *nest;
-
-		if (exts->action->type != TCA_OLD_COMPAT) {
-			nest = nla_nest_start(skb, map->action);
+		if (exts->type != TCA_OLD_COMPAT) {
+			nest = nla_nest_start(skb, exts->action);
 			if (nest == NULL)
 				goto nla_put_failure;
-			if (tcf_action_dump(skb, exts->action, 0, 0) < 0)
+			if (tcf_action_dump(skb, &exts->actions, 0, 0) < 0)
 				goto nla_put_failure;
 			nla_nest_end(skb, nest);
-		} else if (map->police) {
-			nest = nla_nest_start(skb, map->police);
+		} else if (exts->police) {
+			struct tc_action *act = tcf_exts_first_act(exts);
+			nest = nla_nest_start(skb, exts->police);
 			if (nest == NULL)
 				goto nla_put_failure;
-			if (tcf_action_dump_old(skb, exts->action, 0, 0) < 0)
+			if (tcf_action_dump_old(skb, act, 0, 0) < 0)
 				goto nla_put_failure;
 			nla_nest_end(skb, nest);
 		}
@@ -600,17 +592,14 @@ nla_put_failure: __attribute__ ((unused))
 EXPORT_SYMBOL(tcf_exts_dump);
 
 
-int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts,
-			const struct tcf_ext_map *map)
+int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts)
 {
 #ifdef CONFIG_NET_CLS_ACT
-	if (exts->action)
-		if (tcf_action_copy_stats(skb, exts->action, 1) < 0)
-			goto nla_put_failure;
+	struct tc_action *a = tcf_exts_first_act(exts);
+	if (tcf_action_copy_stats(skb, a, 1) < 0)
+		return -1;
 #endif
 	return 0;
-nla_put_failure: __attribute__ ((unused))
-	return -1;
 }
 EXPORT_SYMBOL(tcf_exts_dump_stats);
 
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index 636d9131d870..b6552035d1f4 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -34,11 +34,6 @@ struct basic_filter {
 	struct list_head	link;
 };
 
-static const struct tcf_ext_map basic_ext_map = {
-	.action = TCA_BASIC_ACT,
-	.police = TCA_BASIC_POLICE
-};
-
 static int basic_classify(struct sk_buff *skb, const struct tcf_proto *tp,
 			  struct tcf_result *res)
 {
@@ -141,7 +136,8 @@ static int basic_set_parms(struct net *net, struct tcf_proto *tp,
 	struct tcf_exts e;
 	struct tcf_ematch_tree t;
 
-	err = tcf_exts_validate(net, tp, tb, est, &e, &basic_ext_map);
+	tcf_exts_init(&e, TCA_BASIC_ACT, TCA_BASIC_POLICE);
+	err = tcf_exts_validate(net, tp, tb, est, &e);
 	if (err < 0)
 		return err;
 
@@ -191,6 +187,7 @@ static int basic_change(struct net *net, struct sk_buff *in_skb,
 	if (f == NULL)
 		goto errout;
 
+	tcf_exts_init(&f->exts, TCA_BASIC_ACT, TCA_BASIC_POLICE);
 	err = -EINVAL;
 	if (handle)
 		f->handle = handle;
@@ -263,13 +260,13 @@ static int basic_dump(struct tcf_proto *tp, unsigned long fh,
 	    nla_put_u32(skb, TCA_BASIC_CLASSID, f->res.classid))
 		goto nla_put_failure;
 
-	if (tcf_exts_dump(skb, &f->exts, &basic_ext_map) < 0 ||
+	if (tcf_exts_dump(skb, &f->exts) < 0 ||
 	    tcf_em_tree_dump(skb, &f->ematches, TCA_BASIC_EMATCHES) < 0)
 		goto nla_put_failure;
 
 	nla_nest_end(skb, nest);
 
-	if (tcf_exts_dump_stats(skb, &f->exts, &basic_ext_map) < 0)
+	if (tcf_exts_dump_stats(skb, &f->exts) < 0)
 		goto nla_put_failure;
 
 	return skb->len;
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index d7c72be121f3..00a5a585e5f1 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -46,11 +46,6 @@ static const struct nla_policy bpf_policy[TCA_BPF_MAX + 1] = {
 				    .len = sizeof(struct sock_filter) * BPF_MAXINSNS },
 };
 
-static const struct tcf_ext_map bpf_ext_map = {
-	.action = TCA_BPF_ACT,
-	.police = TCA_BPF_POLICE,
-};
-
 static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
 			    struct tcf_result *res)
 {
@@ -174,7 +169,8 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp,
 	if (!tb[TCA_BPF_OPS_LEN] || !tb[TCA_BPF_OPS] || !tb[TCA_BPF_CLASSID])
 		return -EINVAL;
 
-	ret = tcf_exts_validate(net, tp, tb, est, &exts, &bpf_ext_map);
+	tcf_exts_init(&exts, TCA_BPF_ACT, TCA_BPF_POLICE);
+	ret = tcf_exts_validate(net, tp, tb, est, &exts);
 	if (ret < 0)
 		return ret;
 
@@ -271,6 +267,7 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
 	if (prog == NULL)
 		return -ENOBUFS;
 
+	tcf_exts_init(&prog->exts, TCA_BPF_ACT, TCA_BPF_POLICE);
 	if (handle == 0)
 		prog->handle = cls_bpf_grab_new_handle(tp, head);
 	else
@@ -325,12 +322,12 @@ static int cls_bpf_dump(struct tcf_proto *tp, unsigned long fh,
 
 	memcpy(nla_data(nla), prog->bpf_ops, nla_len(nla));
 
-	if (tcf_exts_dump(skb, &prog->exts, &bpf_ext_map) < 0)
+	if (tcf_exts_dump(skb, &prog->exts) < 0)
 		goto nla_put_failure;
 
 	nla_nest_end(skb, nest);
 
-	if (tcf_exts_dump_stats(skb, &prog->exts, &bpf_ext_map) < 0)
+	if (tcf_exts_dump_stats(skb, &prog->exts) < 0)
 		goto nla_put_failure;
 
 	return skb->len;
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index 838fa40abad1..8349fcdc50f3 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -76,11 +76,6 @@ static int cls_cgroup_init(struct tcf_proto *tp)
 	return 0;
 }
 
-static const struct tcf_ext_map cgroup_ext_map = {
-	.action = TCA_CGROUP_ACT,
-	.police = TCA_CGROUP_POLICE,
-};
-
 static const struct nla_policy cgroup_policy[TCA_CGROUP_MAX + 1] = {
 	[TCA_CGROUP_EMATCHES]	= { .type = NLA_NESTED },
 };
@@ -107,6 +102,7 @@ static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb,
 		if (head == NULL)
 			return -ENOBUFS;
 
+		tcf_exts_init(&head->exts, TCA_CGROUP_ACT, TCA_CGROUP_POLICE);
 		head->handle = handle;
 
 		tcf_tree_lock(tp);
@@ -122,8 +118,8 @@ static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb,
 	if (err < 0)
 		return err;
 
-	err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e,
-				&cgroup_ext_map);
+	tcf_exts_init(&e, TCA_CGROUP_ACT, TCA_CGROUP_POLICE);
+	err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e);
 	if (err < 0)
 		return err;
 
@@ -181,13 +177,13 @@ static int cls_cgroup_dump(struct tcf_proto *tp, unsigned long fh,
 	if (nest == NULL)
 		goto nla_put_failure;
 
-	if (tcf_exts_dump(skb, &head->exts, &cgroup_ext_map) < 0 ||
+	if (tcf_exts_dump(skb, &head->exts) < 0 ||
 	    tcf_em_tree_dump(skb, &head->ematches, TCA_CGROUP_EMATCHES) < 0)
 		goto nla_put_failure;
 
 	nla_nest_end(skb, nest);
 
-	if (tcf_exts_dump_stats(skb, &head->exts, &cgroup_ext_map) < 0)
+	if (tcf_exts_dump_stats(skb, &head->exts) < 0)
 		goto nla_put_failure;
 
 	return skb->len;
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index 7881e2fccbc2..dfd18a5c3e81 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -56,11 +56,6 @@ struct flow_filter {
 	u32			hashrnd;
 };
 
-static const struct tcf_ext_map flow_ext_map = {
-	.action	= TCA_FLOW_ACT,
-	.police	= TCA_FLOW_POLICE,
-};
-
 static inline u32 addr_fold(void *addr)
 {
 	unsigned long a = (unsigned long)addr;
@@ -220,7 +215,7 @@ static u32 flow_get_vlan_tag(const struct sk_buff *skb)
 
 static u32 flow_get_rxhash(struct sk_buff *skb)
 {
-	return skb_get_rxhash(skb);
+	return skb_get_hash(skb);
 }
 
 static u32 flow_key_get(struct sk_buff *skb, int key, struct flow_keys *flow)
@@ -397,7 +392,8 @@ static int flow_change(struct net *net, struct sk_buff *in_skb,
 			return -EOPNOTSUPP;
 	}
 
-	err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, &flow_ext_map);
+	tcf_exts_init(&e, TCA_FLOW_ACT, TCA_FLOW_POLICE);
+	err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e);
 	if (err < 0)
 		return err;
 
@@ -455,6 +451,7 @@ static int flow_change(struct net *net, struct sk_buff *in_skb,
 
 		f->handle = handle;
 		f->mask	  = ~0U;
+		tcf_exts_init(&f->exts, TCA_FLOW_ACT, TCA_FLOW_POLICE);
 
 		get_random_bytes(&f->hashrnd, 4);
 		f->perturb_timer.function = flow_perturbation;
@@ -608,7 +605,7 @@ static int flow_dump(struct tcf_proto *tp, unsigned long fh,
 	    nla_put_u32(skb, TCA_FLOW_PERTURB, f->perturb_period / HZ))
 		goto nla_put_failure;
 
-	if (tcf_exts_dump(skb, &f->exts, &flow_ext_map) < 0)
+	if (tcf_exts_dump(skb, &f->exts) < 0)
 		goto nla_put_failure;
 #ifdef CONFIG_NET_EMATCH
 	if (f->ematches.hdr.nmatches &&
@@ -617,7 +614,7 @@ static int flow_dump(struct tcf_proto *tp, unsigned long fh,
 #endif
 	nla_nest_end(skb, nest);
 
-	if (tcf_exts_dump_stats(skb, &f->exts, &flow_ext_map) < 0)
+	if (tcf_exts_dump_stats(skb, &f->exts) < 0)
 		goto nla_put_failure;
 
 	return skb->len;
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index 9b97172db84a..3f9cece13807 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -46,11 +46,6 @@ struct fw_filter {
 	struct tcf_exts		exts;
 };
 
-static const struct tcf_ext_map fw_ext_map = {
-	.action = TCA_FW_ACT,
-	.police = TCA_FW_POLICE
-};
-
 static inline int fw_hash(u32 handle)
 {
 	if (HTSIZE == 4096)
@@ -200,7 +195,8 @@ fw_change_attrs(struct net *net, struct tcf_proto *tp, struct fw_filter *f,
 	u32 mask;
 	int err;
 
-	err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, &fw_ext_map);
+	tcf_exts_init(&e, TCA_FW_ACT, TCA_FW_POLICE);
+	err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e);
 	if (err < 0)
 		return err;
 
@@ -280,6 +276,7 @@ static int fw_change(struct net *net, struct sk_buff *in_skb,
 	if (f == NULL)
 		return -ENOBUFS;
 
+	tcf_exts_init(&f->exts, TCA_FW_ACT, TCA_FW_POLICE);
 	f->id = handle;
 
 	err = fw_change_attrs(net, tp, f, tb, tca, base);
@@ -359,12 +356,12 @@ static int fw_dump(struct tcf_proto *tp, unsigned long fh,
 	    nla_put_u32(skb, TCA_FW_MASK, head->mask))
 		goto nla_put_failure;
 
-	if (tcf_exts_dump(skb, &f->exts, &fw_ext_map) < 0)
+	if (tcf_exts_dump(skb, &f->exts) < 0)
 		goto nla_put_failure;
 
 	nla_nest_end(skb, nest);
 
-	if (tcf_exts_dump_stats(skb, &f->exts, &fw_ext_map) < 0)
+	if (tcf_exts_dump_stats(skb, &f->exts) < 0)
 		goto nla_put_failure;
 
 	return skb->len;
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index 37da567d833e..2473953a5948 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -59,11 +59,6 @@ struct route4_filter {
 
 #define ROUTE4_FAILURE ((struct route4_filter *)(-1L))
 
-static const struct tcf_ext_map route_ext_map = {
-	.police = TCA_ROUTE4_POLICE,
-	.action = TCA_ROUTE4_ACT
-};
-
 static inline int route4_fastmap_hash(u32 id, int iif)
 {
 	return id & 0xF;
@@ -347,7 +342,8 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp,
 	struct route4_bucket *b;
 	struct tcf_exts e;
 
-	err = tcf_exts_validate(net, tp, tb, est, &e, &route_ext_map);
+	tcf_exts_init(&e, TCA_ROUTE4_ACT, TCA_ROUTE4_POLICE);
+	err = tcf_exts_validate(net, tp, tb, est, &e);
 	if (err < 0)
 		return err;
 
@@ -481,6 +477,7 @@ static int route4_change(struct net *net, struct sk_buff *in_skb,
 	if (f == NULL)
 		goto errout;
 
+	tcf_exts_init(&f->exts, TCA_ROUTE4_ACT, TCA_ROUTE4_POLICE);
 	err = route4_set_parms(net, tp, base, f, handle, head, tb,
 		tca[TCA_RATE], 1);
 	if (err < 0)
@@ -589,12 +586,12 @@ static int route4_dump(struct tcf_proto *tp, unsigned long fh,
 	    nla_put_u32(skb, TCA_ROUTE4_CLASSID, f->res.classid))
 		goto nla_put_failure;
 
-	if (tcf_exts_dump(skb, &f->exts, &route_ext_map) < 0)
+	if (tcf_exts_dump(skb, &f->exts) < 0)
 		goto nla_put_failure;
 
 	nla_nest_end(skb, nest);
 
-	if (tcf_exts_dump_stats(skb, &f->exts, &route_ext_map) < 0)
+	if (tcf_exts_dump_stats(skb, &f->exts) < 0)
 		goto nla_put_failure;
 
 	return skb->len;
diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h
index 252d8b05872e..4f25c2ac825b 100644
--- a/net/sched/cls_rsvp.h
+++ b/net/sched/cls_rsvp.h
@@ -116,11 +116,6 @@ static inline unsigned int hash_src(__be32 *src)
 	return h & 0xF;
 }
 
-static struct tcf_ext_map rsvp_ext_map = {
-	.police = TCA_RSVP_POLICE,
-	.action = TCA_RSVP_ACT
-};
-
 #define RSVP_APPLY_RESULT()				\
 {							\
 	int r = tcf_exts_exec(skb, &f->exts, res);	\
@@ -440,7 +435,8 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb,
 	if (err < 0)
 		return err;
 
-	err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, &rsvp_ext_map);
+	tcf_exts_init(&e, TCA_RSVP_ACT, TCA_RSVP_POLICE);
+	err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e);
 	if (err < 0)
 		return err;
 
@@ -471,6 +467,7 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb,
 	if (f == NULL)
 		goto errout2;
 
+	tcf_exts_init(&f->exts, TCA_RSVP_ACT, TCA_RSVP_POLICE);
 	h2 = 16;
 	if (tb[TCA_RSVP_SRC]) {
 		memcpy(f->src, nla_data(tb[TCA_RSVP_SRC]), sizeof(f->src));
@@ -633,12 +630,12 @@ static int rsvp_dump(struct tcf_proto *tp, unsigned long fh,
 	    nla_put(skb, TCA_RSVP_SRC, sizeof(f->src), f->src))
 		goto nla_put_failure;
 
-	if (tcf_exts_dump(skb, &f->exts, &rsvp_ext_map) < 0)
+	if (tcf_exts_dump(skb, &f->exts) < 0)
 		goto nla_put_failure;
 
 	nla_nest_end(skb, nest);
 
-	if (tcf_exts_dump_stats(skb, &f->exts, &rsvp_ext_map) < 0)
+	if (tcf_exts_dump_stats(skb, &f->exts) < 0)
 		goto nla_put_failure;
 	return skb->len;
 
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index b86535a40169..ffad18791c93 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -50,11 +50,6 @@ struct tcindex_data {
 	int fall_through;	/* 0: only classify if explicit match */
 };
 
-static const struct tcf_ext_map tcindex_ext_map = {
-	.police = TCA_TCINDEX_POLICE,
-	.action = TCA_TCINDEX_ACT
-};
-
 static inline int
 tcindex_filter_is_set(struct tcindex_filter_result *r)
 {
@@ -209,17 +204,21 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
 	struct tcindex_filter *f = NULL; /* make gcc behave */
 	struct tcf_exts e;
 
-	err = tcf_exts_validate(net, tp, tb, est, &e, &tcindex_ext_map);
+	tcf_exts_init(&e, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
+	err = tcf_exts_validate(net, tp, tb, est, &e);
 	if (err < 0)
 		return err;
 
 	memcpy(&cp, p, sizeof(cp));
 	memset(&new_filter_result, 0, sizeof(new_filter_result));
+	tcf_exts_init(&new_filter_result.exts, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
 
 	if (old_r)
 		memcpy(&cr, r, sizeof(cr));
-	else
+	else {
 		memset(&cr, 0, sizeof(cr));
+		tcf_exts_init(&cr.exts, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
+	}
 
 	if (tb[TCA_TCINDEX_HASH])
 		cp.hash = nla_get_u32(tb[TCA_TCINDEX_HASH]);
@@ -468,11 +467,11 @@ static int tcindex_dump(struct tcf_proto *tp, unsigned long fh,
 		    nla_put_u32(skb, TCA_TCINDEX_CLASSID, r->res.classid))
 			goto nla_put_failure;
 
-		if (tcf_exts_dump(skb, &r->exts, &tcindex_ext_map) < 0)
+		if (tcf_exts_dump(skb, &r->exts) < 0)
 			goto nla_put_failure;
 		nla_nest_end(skb, nest);
 
-		if (tcf_exts_dump_stats(skb, &r->exts, &tcindex_ext_map) < 0)
+		if (tcf_exts_dump_stats(skb, &r->exts) < 0)
 			goto nla_put_failure;
 	}
 
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 59e546c2ac98..20f2fb79c747 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -79,11 +79,6 @@ struct tc_u_common {
 	u32			hgenerator;
 };
 
-static const struct tcf_ext_map u32_ext_map = {
-	.action = TCA_U32_ACT,
-	.police = TCA_U32_POLICE
-};
-
 static inline unsigned int u32_hash_fold(__be32 key,
 					 const struct tc_u32_sel *sel,
 					 u8 fshift)
@@ -496,7 +491,8 @@ static int u32_set_parms(struct net *net, struct tcf_proto *tp,
 	int err;
 	struct tcf_exts e;
 
-	err = tcf_exts_validate(net, tp, tb, est, &e, &u32_ext_map);
+	tcf_exts_init(&e, TCA_U32_ACT, TCA_U32_POLICE);
+	err = tcf_exts_validate(net, tp, tb, est, &e);
 	if (err < 0)
 		return err;
 
@@ -646,6 +642,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
 	n->ht_up = ht;
 	n->handle = handle;
 	n->fshift = s->hmask ? ffs(ntohl(s->hmask)) - 1 : 0;
+	tcf_exts_init(&n->exts, TCA_U32_ACT, TCA_U32_POLICE);
 
 #ifdef CONFIG_CLS_U32_MARK
 	if (tb[TCA_U32_MARK]) {
@@ -759,7 +756,7 @@ static int u32_dump(struct tcf_proto *tp, unsigned long fh,
 			goto nla_put_failure;
 #endif
 
-		if (tcf_exts_dump(skb, &n->exts, &u32_ext_map) < 0)
+		if (tcf_exts_dump(skb, &n->exts) < 0)
 			goto nla_put_failure;
 
 #ifdef CONFIG_NET_CLS_IND
@@ -778,7 +775,7 @@ static int u32_dump(struct tcf_proto *tp, unsigned long fh,
 	nla_nest_end(skb, nest);
 
 	if (TC_U32_KEY(n->handle))
-		if (tcf_exts_dump_stats(skb, &n->exts, &u32_ext_map) < 0)
+		if (tcf_exts_dump_stats(skb, &n->exts) < 0)
 			goto nla_put_failure;
 	return skb->len;
 
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c
index 382519a5d7f9..9b8c0b0e60d7 100644
--- a/net/sched/em_meta.c
+++ b/net/sched/em_meta.c
@@ -222,7 +222,7 @@ META_COLLECTOR(int_maclen)
 
 META_COLLECTOR(int_rxhash)
 {
-	dst->value = skb_get_rxhash(skb);
+	dst->value = skb_get_hash(skb);
 }
 
 /**************************************************************************
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 547b4a88ae2a..1313145e3b86 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -135,7 +135,7 @@ static DEFINE_RWLOCK(qdisc_mod_lock);
 
 static struct Qdisc_ops *qdisc_base;
 
-/* Register/uregister queueing discipline */
+/* Register/unregister queueing discipline */
 
 int register_qdisc(struct Qdisc_ops *qops)
 {
@@ -273,8 +273,11 @@ static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
 
 void qdisc_list_add(struct Qdisc *q)
 {
+	struct Qdisc *root = qdisc_dev(q)->qdisc;
+
+	WARN_ON_ONCE(root == &noop_qdisc);
 	if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS))
-		list_add_tail(&q->list, &qdisc_dev(q)->qdisc->list);
+		list_add_tail(&q->list, &root->list);
 }
 EXPORT_SYMBOL(qdisc_list_add);
 
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index d5a8a4b2454f..2f80d01d42a6 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -1060,8 +1060,8 @@ static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
 			}
 			if (cl->quantum <= 0 ||
 			    cl->quantum > 32*qdisc_dev(cl->qdisc)->mtu) {
-				pr_warning("CBQ: class %08x has bad quantum==%ld, repaired.\n",
-					   cl->common.classid, cl->quantum);
+				pr_warn("CBQ: class %08x has bad quantum==%ld, repaired.\n",
+					cl->common.classid, cl->quantum);
 				cl->quantum = qdisc_dev(cl->qdisc)->mtu/2 + 1;
 			}
 		}
@@ -1783,8 +1783,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
 						    qdisc_root_sleeping_lock(sch),
 						    tca[TCA_RATE]);
 			if (err) {
-				if (rtab)
-					qdisc_put_rtab(rtab);
+				qdisc_put_rtab(rtab);
 				return err;
 			}
 		}
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index 0952fd2684e4..49d6ef338b55 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -57,8 +57,8 @@ static int dsmark_graft(struct Qdisc *sch, unsigned long arg,
 {
 	struct dsmark_qdisc_data *p = qdisc_priv(sch);
 
-	pr_debug("dsmark_graft(sch %p,[qdisc %p],new %p,old %p)\n",
-		sch, p, new, old);
+	pr_debug("%s(sch %p,[qdisc %p],new %p,old %p)\n",
+		 __func__, sch, p, new, old);
 
 	if (new == NULL) {
 		new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
@@ -85,8 +85,8 @@ static struct Qdisc *dsmark_leaf(struct Qdisc *sch, unsigned long arg)
 
 static unsigned long dsmark_get(struct Qdisc *sch, u32 classid)
 {
-	pr_debug("dsmark_get(sch %p,[qdisc %p],classid %x)\n",
-		sch, qdisc_priv(sch), classid);
+	pr_debug("%s(sch %p,[qdisc %p],classid %x)\n",
+		 __func__, sch, qdisc_priv(sch), classid);
 
 	return TC_H_MIN(classid) + 1;
 }
@@ -118,8 +118,8 @@ static int dsmark_change(struct Qdisc *sch, u32 classid, u32 parent,
 	int err = -EINVAL;
 	u8 mask = 0;
 
-	pr_debug("dsmark_change(sch %p,[qdisc %p],classid %x,parent %x),"
-		"arg 0x%lx\n", sch, p, classid, parent, *arg);
+	pr_debug("%s(sch %p,[qdisc %p],classid %x,parent %x), arg 0x%lx\n",
+		 __func__, sch, p, classid, parent, *arg);
 
 	if (!dsmark_valid_index(p, *arg)) {
 		err = -ENOENT;
@@ -166,7 +166,8 @@ static void dsmark_walk(struct Qdisc *sch, struct qdisc_walker *walker)
 	struct dsmark_qdisc_data *p = qdisc_priv(sch);
 	int i;
 
-	pr_debug("dsmark_walk(sch %p,[qdisc %p],walker %p)\n", sch, p, walker);
+	pr_debug("%s(sch %p,[qdisc %p],walker %p)\n",
+		 __func__, sch, p, walker);
 
 	if (walker->stop)
 		return;
@@ -199,7 +200,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	struct dsmark_qdisc_data *p = qdisc_priv(sch);
 	int err;
 
-	pr_debug("dsmark_enqueue(skb %p,sch %p,[qdisc %p])\n", skb, sch, p);
+	pr_debug("%s(skb %p,sch %p,[qdisc %p])\n", __func__, skb, sch, p);
 
 	if (p->set_tc_index) {
 		switch (skb->protocol) {
@@ -275,7 +276,7 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch)
 	struct sk_buff *skb;
 	u32 index;
 
-	pr_debug("dsmark_dequeue(sch %p,[qdisc %p])\n", sch, p);
+	pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p);
 
 	skb = p->q->ops->dequeue(p->q);
 	if (skb == NULL)
@@ -303,8 +304,8 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch)
 		 * and don't need yet another qdisc as a bypass.
 		 */
 		if (p->mask[index] != 0xff || p->value[index])
-			pr_warning("dsmark_dequeue: unsupported protocol %d\n",
-				   ntohs(skb->protocol));
+			pr_warn("%s: unsupported protocol %d\n",
+				__func__, ntohs(skb->protocol));
 		break;
 	}
 
@@ -315,7 +316,7 @@ static struct sk_buff *dsmark_peek(struct Qdisc *sch)
 {
 	struct dsmark_qdisc_data *p = qdisc_priv(sch);
 
-	pr_debug("dsmark_peek(sch %p,[qdisc %p])\n", sch, p);
+	pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p);
 
 	return p->q->ops->peek(p->q);
 }
@@ -325,7 +326,7 @@ static unsigned int dsmark_drop(struct Qdisc *sch)
 	struct dsmark_qdisc_data *p = qdisc_priv(sch);
 	unsigned int len;
 
-	pr_debug("dsmark_reset(sch %p,[qdisc %p])\n", sch, p);
+	pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p);
 
 	if (p->q->ops->drop == NULL)
 		return 0;
@@ -346,7 +347,7 @@ static int dsmark_init(struct Qdisc *sch, struct nlattr *opt)
 	u16 indices;
 	u8 *mask;
 
-	pr_debug("dsmark_init(sch %p,[qdisc %p],opt %p)\n", sch, p, opt);
+	pr_debug("%s(sch %p,[qdisc %p],opt %p)\n", __func__, sch, p, opt);
 
 	if (!opt)
 		goto errout;
@@ -384,7 +385,7 @@ static int dsmark_init(struct Qdisc *sch, struct nlattr *opt)
 	if (p->q == NULL)
 		p->q = &noop_qdisc;
 
-	pr_debug("dsmark_init: qdisc %p\n", p->q);
+	pr_debug("%s: qdisc %p\n", __func__, p->q);
 
 	err = 0;
 errout:
@@ -395,7 +396,7 @@ static void dsmark_reset(struct Qdisc *sch)
 {
 	struct dsmark_qdisc_data *p = qdisc_priv(sch);
 
-	pr_debug("dsmark_reset(sch %p,[qdisc %p])\n", sch, p);
+	pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p);
 	qdisc_reset(p->q);
 	sch->q.qlen = 0;
 }
@@ -404,7 +405,7 @@ static void dsmark_destroy(struct Qdisc *sch)
 {
 	struct dsmark_qdisc_data *p = qdisc_priv(sch);
 
-	pr_debug("dsmark_destroy(sch %p,[qdisc %p])\n", sch, p);
+	pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p);
 
 	tcf_destroy_chain(&p->filter_list);
 	qdisc_destroy(p->q);
@@ -417,7 +418,7 @@ static int dsmark_dump_class(struct Qdisc *sch, unsigned long cl,
 	struct dsmark_qdisc_data *p = qdisc_priv(sch);
 	struct nlattr *opts = NULL;
 
-	pr_debug("dsmark_dump_class(sch %p,[qdisc %p],class %ld\n", sch, p, cl);
+	pr_debug("%s(sch %p,[qdisc %p],class %ld\n", __func__, sch, p, cl);
 
 	if (!dsmark_valid_index(p, cl))
 		return -EINVAL;
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index 95d843961907..08ef7a42c0e4 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -47,6 +47,7 @@
 #include <linux/rbtree.h>
 #include <linux/hash.h>
 #include <linux/prefetch.h>
+#include <linux/vmalloc.h>
 #include <net/netlink.h>
 #include <net/pkt_sched.h>
 #include <net/sock.h>
@@ -225,7 +226,7 @@ static struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q)
 		/* By forcing low order bit to 1, we make sure to not
 		 * collide with a local flow (socket pointers are word aligned)
 		 */
-		sk = (struct sock *)(skb_get_rxhash(skb) | 1L);
+		sk = (struct sock *)(skb_get_hash(skb) | 1L);
 	}
 
 	root = &q->fq_root[hash_32((u32)(long)sk, q->fq_trees_log)];
@@ -578,15 +579,36 @@ static void fq_rehash(struct fq_sched_data *q,
 	q->stat_gc_flows += fcnt;
 }
 
-static int fq_resize(struct fq_sched_data *q, u32 log)
+static void *fq_alloc_node(size_t sz, int node)
 {
+	void *ptr;
+
+	ptr = kmalloc_node(sz, GFP_KERNEL | __GFP_REPEAT | __GFP_NOWARN, node);
+	if (!ptr)
+		ptr = vmalloc_node(sz, node);
+	return ptr;
+}
+
+static void fq_free(void *addr)
+{
+	if (addr && is_vmalloc_addr(addr))
+		vfree(addr);
+	else
+		kfree(addr);
+}
+
+static int fq_resize(struct Qdisc *sch, u32 log)
+{
+	struct fq_sched_data *q = qdisc_priv(sch);
 	struct rb_root *array;
 	u32 idx;
 
 	if (q->fq_root && log == q->fq_trees_log)
 		return 0;
 
-	array = kmalloc(sizeof(struct rb_root) << log, GFP_KERNEL);
+	/* If XPS was setup, we can allocate memory on right NUMA node */
+	array = fq_alloc_node(sizeof(struct rb_root) << log,
+			      netdev_queue_numa_node_read(sch->dev_queue));
 	if (!array)
 		return -ENOMEM;
 
@@ -595,7 +617,7 @@ static int fq_resize(struct fq_sched_data *q, u32 log)
 
 	if (q->fq_root) {
 		fq_rehash(q, q->fq_root, q->fq_trees_log, array, log);
-		kfree(q->fq_root);
+		fq_free(q->fq_root);
 	}
 	q->fq_root = array;
 	q->fq_trees_log = log;
@@ -676,7 +698,7 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt)
 	}
 
 	if (!err)
-		err = fq_resize(q, fq_log);
+		err = fq_resize(sch, fq_log);
 
 	while (sch->q.qlen > sch->limit) {
 		struct sk_buff *skb = fq_dequeue(sch);
@@ -697,7 +719,7 @@ static void fq_destroy(struct Qdisc *sch)
 	struct fq_sched_data *q = qdisc_priv(sch);
 
 	fq_reset(sch);
-	kfree(q->fq_root);
+	fq_free(q->fq_root);
 	qdisc_watchdog_cancel(&q->watchdog);
 }
 
@@ -723,7 +745,7 @@ static int fq_init(struct Qdisc *sch, struct nlattr *opt)
 	if (opt)
 		err = fq_change(sch, opt);
 	else
-		err = fq_resize(q, q->fq_trees_log);
+		err = fq_resize(sch, q->fq_trees_log);
 
 	return err;
 }
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 6a91d7d48ade..32bb942d2faa 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -718,8 +718,8 @@ static void attach_default_qdiscs(struct net_device *dev)
 	} else {
 		qdisc = qdisc_create_dflt(txq, &mq_qdisc_ops, TC_H_ROOT);
 		if (qdisc) {
-			qdisc->ops->attach(qdisc);
 			dev->qdisc = qdisc;
+			qdisc->ops->attach(qdisc);
 		}
 	}
 }
diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c
index d42234c0f13b..12cbc09157fc 100644
--- a/net/sched/sch_gred.c
+++ b/net/sched/sch_gred.c
@@ -370,8 +370,8 @@ static inline int gred_change_table_def(struct Qdisc *sch, struct nlattr *dps)
 
 	for (i = table->DPs; i < MAX_DPs; i++) {
 		if (table->tab[i]) {
-			pr_warning("GRED: Warning: Destroying "
-				   "shadowed VQ 0x%x\n", i);
+			pr_warn("GRED: Warning: Destroying shadowed VQ 0x%x\n",
+				i);
 			gred_destroy_vq(table->tab[i]);
 			table->tab[i] = NULL;
 		}
diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c
new file mode 100644
index 000000000000..cf7f614e841b
--- /dev/null
+++ b/net/sched/sch_hhf.c
@@ -0,0 +1,745 @@
+/* net/sched/sch_hhf.c		Heavy-Hitter Filter (HHF)
+ *
+ * Copyright (C) 2013 Terry Lam <vtlam@google.com>
+ * Copyright (C) 2013 Nandita Dukkipati <nanditad@google.com>
+ */
+
+#include <linux/jhash.h>
+#include <linux/jiffies.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/vmalloc.h>
+#include <net/flow_keys.h>
+#include <net/pkt_sched.h>
+#include <net/sock.h>
+
+/*	Heavy-Hitter Filter (HHF)
+ *
+ * Principles :
+ * Flows are classified into two buckets: non-heavy-hitter and heavy-hitter
+ * buckets. Initially, a new flow starts as non-heavy-hitter. Once classified
+ * as heavy-hitter, it is immediately switched to the heavy-hitter bucket.
+ * The buckets are dequeued by a Weighted Deficit Round Robin (WDRR) scheduler,
+ * in which the heavy-hitter bucket is served with less weight.
+ * In other words, non-heavy-hitters (e.g., short bursts of critical traffic)
+ * are isolated from heavy-hitters (e.g., persistent bulk traffic) and also have
+ * higher share of bandwidth.
+ *
+ * To capture heavy-hitters, we use the "multi-stage filter" algorithm in the
+ * following paper:
+ * [EV02] C. Estan and G. Varghese, "New Directions in Traffic Measurement and
+ * Accounting", in ACM SIGCOMM, 2002.
+ *
+ * Conceptually, a multi-stage filter comprises k independent hash functions
+ * and k counter arrays. Packets are indexed into k counter arrays by k hash
+ * functions, respectively. The counters are then increased by the packet sizes.
+ * Therefore,
+ *    - For a heavy-hitter flow: *all* of its k array counters must be large.
+ *    - For a non-heavy-hitter flow: some of its k array counters can be large
+ *      due to hash collision with other small flows; however, with high
+ *      probability, not *all* k counters are large.
+ *
+ * By the design of the multi-stage filter algorithm, the false negative rate
+ * (heavy-hitters getting away uncaptured) is zero. However, the algorithm is
+ * susceptible to false positives (non-heavy-hitters mistakenly classified as
+ * heavy-hitters).
+ * Therefore, we also implement the following optimizations to reduce false
+ * positives by avoiding unnecessary increment of the counter values:
+ *    - Optimization O1: once a heavy-hitter is identified, its bytes are not
+ *        accounted in the array counters. This technique is called "shielding"
+ *        in Section 3.3.1 of [EV02].
+ *    - Optimization O2: conservative update of counters
+ *                       (Section 3.3.2 of [EV02]),
+ *        New counter value = max {old counter value,
+ *                                 smallest counter value + packet bytes}
+ *
+ * Finally, we refresh the counters periodically since otherwise the counter
+ * values will keep accumulating.
+ *
+ * Once a flow is classified as heavy-hitter, we also save its per-flow state
+ * in an exact-matching flow table so that its subsequent packets can be
+ * dispatched to the heavy-hitter bucket accordingly.
+ *
+ *
+ * At a high level, this qdisc works as follows:
+ * Given a packet p:
+ *   - If the flow-id of p (e.g., TCP 5-tuple) is already in the exact-matching
+ *     heavy-hitter flow table, denoted table T, then send p to the heavy-hitter
+ *     bucket.
+ *   - Otherwise, forward p to the multi-stage filter, denoted filter F
+ *        + If F decides that p belongs to a non-heavy-hitter flow, then send p
+ *          to the non-heavy-hitter bucket.
+ *        + Otherwise, if F decides that p belongs to a new heavy-hitter flow,
+ *          then set up a new flow entry for the flow-id of p in the table T and
+ *          send p to the heavy-hitter bucket.
+ *
+ * In this implementation:
+ *   - T is a fixed-size hash-table with 1024 entries. Hash collision is
+ *     resolved by linked-list chaining.
+ *   - F has four counter arrays, each array containing 1024 32-bit counters.
+ *     That means 4 * 1024 * 32 bits = 16KB of memory.
+ *   - Since each array in F contains 1024 counters, 10 bits are sufficient to
+ *     index into each array.
+ *     Hence, instead of having four hash functions, we chop the 32-bit
+ *     skb-hash into three 10-bit chunks, and the remaining 10-bit chunk is
+ *     computed as XOR sum of those three chunks.
+ *   - We need to clear the counter arrays periodically; however, directly
+ *     memsetting 16KB of memory can lead to cache eviction and unwanted delay.
+ *     So by representing each counter by a valid bit, we only need to reset
+ *     4K of 1 bit (i.e. 512 bytes) instead of 16KB of memory.
+ *   - The Deficit Round Robin engine is taken from fq_codel implementation
+ *     (net/sched/sch_fq_codel.c). Note that wdrr_bucket corresponds to
+ *     fq_codel_flow in fq_codel implementation.
+ *
+ */
+
+/* Non-configurable parameters */
+#define HH_FLOWS_CNT	 1024  /* number of entries in exact-matching table T */
+#define HHF_ARRAYS_CNT	 4     /* number of arrays in multi-stage filter F */
+#define HHF_ARRAYS_LEN	 1024  /* number of counters in each array of F */
+#define HHF_BIT_MASK_LEN 10    /* masking 10 bits */
+#define HHF_BIT_MASK	 0x3FF /* bitmask of 10 bits */
+
+#define WDRR_BUCKET_CNT  2     /* two buckets for Weighted DRR */
+enum wdrr_bucket_idx {
+	WDRR_BUCKET_FOR_HH	= 0, /* bucket id for heavy-hitters */
+	WDRR_BUCKET_FOR_NON_HH	= 1  /* bucket id for non-heavy-hitters */
+};
+
+#define hhf_time_before(a, b)	\
+	(typecheck(u32, a) && typecheck(u32, b) && ((s32)((a) - (b)) < 0))
+
+/* Heavy-hitter per-flow state */
+struct hh_flow_state {
+	u32		 hash_id;	/* hash of flow-id (e.g. TCP 5-tuple) */
+	u32		 hit_timestamp;	/* last time heavy-hitter was seen */
+	struct list_head flowchain;	/* chaining under hash collision */
+};
+
+/* Weighted Deficit Round Robin (WDRR) scheduler */
+struct wdrr_bucket {
+	struct sk_buff	  *head;
+	struct sk_buff	  *tail;
+	struct list_head  bucketchain;
+	int		  deficit;
+};
+
+struct hhf_sched_data {
+	struct wdrr_bucket buckets[WDRR_BUCKET_CNT];
+	u32		   perturbation;   /* hash perturbation */
+	u32		   quantum;        /* psched_mtu(qdisc_dev(sch)); */
+	u32		   drop_overlimit; /* number of times max qdisc packet
+					    * limit was hit
+					    */
+	struct list_head   *hh_flows;       /* table T (currently active HHs) */
+	u32		   hh_flows_limit;            /* max active HH allocs */
+	u32		   hh_flows_overlimit; /* num of disallowed HH allocs */
+	u32		   hh_flows_total_cnt;          /* total admitted HHs */
+	u32		   hh_flows_current_cnt;        /* total current HHs  */
+	u32		   *hhf_arrays[HHF_ARRAYS_CNT]; /* HH filter F */
+	u32		   hhf_arrays_reset_timestamp;  /* last time hhf_arrays
+							 * was reset
+							 */
+	unsigned long	   *hhf_valid_bits[HHF_ARRAYS_CNT]; /* shadow valid bits
+							     * of hhf_arrays
+							     */
+	/* Similar to the "new_flows" vs. "old_flows" concept in fq_codel DRR */
+	struct list_head   new_buckets; /* list of new buckets */
+	struct list_head   old_buckets; /* list of old buckets */
+
+	/* Configurable HHF parameters */
+	u32		   hhf_reset_timeout; /* interval to reset counter
+					       * arrays in filter F
+					       * (default 40ms)
+					       */
+	u32		   hhf_admit_bytes;   /* counter thresh to classify as
+					       * HH (default 128KB).
+					       * With these default values,
+					       * 128KB / 40ms = 25 Mbps
+					       * i.e., we expect to capture HHs
+					       * sending > 25 Mbps.
+					       */
+	u32		   hhf_evict_timeout; /* aging threshold to evict idle
+					       * HHs out of table T. This should
+					       * be large enough to avoid
+					       * reordering during HH eviction.
+					       * (default 1s)
+					       */
+	u32		   hhf_non_hh_weight; /* WDRR weight for non-HHs
+					       * (default 2,
+					       *  i.e., non-HH : HH = 2 : 1)
+					       */
+};
+
+static u32 hhf_time_stamp(void)
+{
+	return jiffies;
+}
+
+static unsigned int skb_hash(const struct hhf_sched_data *q,
+			     const struct sk_buff *skb)
+{
+	struct flow_keys keys;
+	unsigned int hash;
+
+	if (skb->sk && skb->sk->sk_hash)
+		return skb->sk->sk_hash;
+
+	skb_flow_dissect(skb, &keys);
+	hash = jhash_3words((__force u32)keys.dst,
+			    (__force u32)keys.src ^ keys.ip_proto,
+			    (__force u32)keys.ports, q->perturbation);
+	return hash;
+}
+
+/* Looks up a heavy-hitter flow in a chaining list of table T. */
+static struct hh_flow_state *seek_list(const u32 hash,
+				       struct list_head *head,
+				       struct hhf_sched_data *q)
+{
+	struct hh_flow_state *flow, *next;
+	u32 now = hhf_time_stamp();
+
+	if (list_empty(head))
+		return NULL;
+
+	list_for_each_entry_safe(flow, next, head, flowchain) {
+		u32 prev = flow->hit_timestamp + q->hhf_evict_timeout;
+
+		if (hhf_time_before(prev, now)) {
+			/* Delete expired heavy-hitters, but preserve one entry
+			 * to avoid kzalloc() when next time this slot is hit.
+			 */
+			if (list_is_last(&flow->flowchain, head))
+				return NULL;
+			list_del(&flow->flowchain);
+			kfree(flow);
+			q->hh_flows_current_cnt--;
+		} else if (flow->hash_id == hash) {
+			return flow;
+		}
+	}
+	return NULL;
+}
+
+/* Returns a flow state entry for a new heavy-hitter.  Either reuses an expired
+ * entry or dynamically alloc a new entry.
+ */
+static struct hh_flow_state *alloc_new_hh(struct list_head *head,
+					  struct hhf_sched_data *q)
+{
+	struct hh_flow_state *flow;
+	u32 now = hhf_time_stamp();
+
+	if (!list_empty(head)) {
+		/* Find an expired heavy-hitter flow entry. */
+		list_for_each_entry(flow, head, flowchain) {
+			u32 prev = flow->hit_timestamp + q->hhf_evict_timeout;
+
+			if (hhf_time_before(prev, now))
+				return flow;
+		}
+	}
+
+	if (q->hh_flows_current_cnt >= q->hh_flows_limit) {
+		q->hh_flows_overlimit++;
+		return NULL;
+	}
+	/* Create new entry. */
+	flow = kzalloc(sizeof(struct hh_flow_state), GFP_ATOMIC);
+	if (!flow)
+		return NULL;
+
+	q->hh_flows_current_cnt++;
+	INIT_LIST_HEAD(&flow->flowchain);
+	list_add_tail(&flow->flowchain, head);
+
+	return flow;
+}
+
+/* Assigns packets to WDRR buckets.  Implements a multi-stage filter to
+ * classify heavy-hitters.
+ */
+static enum wdrr_bucket_idx hhf_classify(struct sk_buff *skb, struct Qdisc *sch)
+{
+	struct hhf_sched_data *q = qdisc_priv(sch);
+	u32 tmp_hash, hash;
+	u32 xorsum, filter_pos[HHF_ARRAYS_CNT], flow_pos;
+	struct hh_flow_state *flow;
+	u32 pkt_len, min_hhf_val;
+	int i;
+	u32 prev;
+	u32 now = hhf_time_stamp();
+
+	/* Reset the HHF counter arrays if this is the right time. */
+	prev = q->hhf_arrays_reset_timestamp + q->hhf_reset_timeout;
+	if (hhf_time_before(prev, now)) {
+		for (i = 0; i < HHF_ARRAYS_CNT; i++)
+			bitmap_zero(q->hhf_valid_bits[i], HHF_ARRAYS_LEN);
+		q->hhf_arrays_reset_timestamp = now;
+	}
+
+	/* Get hashed flow-id of the skb. */
+	hash = skb_hash(q, skb);
+
+	/* Check if this packet belongs to an already established HH flow. */
+	flow_pos = hash & HHF_BIT_MASK;
+	flow = seek_list(hash, &q->hh_flows[flow_pos], q);
+	if (flow) { /* found its HH flow */
+		flow->hit_timestamp = now;
+		return WDRR_BUCKET_FOR_HH;
+	}
+
+	/* Now pass the packet through the multi-stage filter. */
+	tmp_hash = hash;
+	xorsum = 0;
+	for (i = 0; i < HHF_ARRAYS_CNT - 1; i++) {
+		/* Split the skb_hash into three 10-bit chunks. */
+		filter_pos[i] = tmp_hash & HHF_BIT_MASK;
+		xorsum ^= filter_pos[i];
+		tmp_hash >>= HHF_BIT_MASK_LEN;
+	}
+	/* The last chunk is computed as XOR sum of other chunks. */
+	filter_pos[HHF_ARRAYS_CNT - 1] = xorsum ^ tmp_hash;
+
+	pkt_len = qdisc_pkt_len(skb);
+	min_hhf_val = ~0U;
+	for (i = 0; i < HHF_ARRAYS_CNT; i++) {
+		u32 val;
+
+		if (!test_bit(filter_pos[i], q->hhf_valid_bits[i])) {
+			q->hhf_arrays[i][filter_pos[i]] = 0;
+			__set_bit(filter_pos[i], q->hhf_valid_bits[i]);
+		}
+
+		val = q->hhf_arrays[i][filter_pos[i]] + pkt_len;
+		if (min_hhf_val > val)
+			min_hhf_val = val;
+	}
+
+	/* Found a new HH iff all counter values > HH admit threshold. */
+	if (min_hhf_val > q->hhf_admit_bytes) {
+		/* Just captured a new heavy-hitter. */
+		flow = alloc_new_hh(&q->hh_flows[flow_pos], q);
+		if (!flow) /* memory alloc problem */
+			return WDRR_BUCKET_FOR_NON_HH;
+		flow->hash_id = hash;
+		flow->hit_timestamp = now;
+		q->hh_flows_total_cnt++;
+
+		/* By returning without updating counters in q->hhf_arrays,
+		 * we implicitly implement "shielding" (see Optimization O1).
+		 */
+		return WDRR_BUCKET_FOR_HH;
+	}
+
+	/* Conservative update of HHF arrays (see Optimization O2). */
+	for (i = 0; i < HHF_ARRAYS_CNT; i++) {
+		if (q->hhf_arrays[i][filter_pos[i]] < min_hhf_val)
+			q->hhf_arrays[i][filter_pos[i]] = min_hhf_val;
+	}
+	return WDRR_BUCKET_FOR_NON_HH;
+}
+
+/* Removes one skb from head of bucket. */
+static struct sk_buff *dequeue_head(struct wdrr_bucket *bucket)
+{
+	struct sk_buff *skb = bucket->head;
+
+	bucket->head = skb->next;
+	skb->next = NULL;
+	return skb;
+}
+
+/* Tail-adds skb to bucket. */
+static void bucket_add(struct wdrr_bucket *bucket, struct sk_buff *skb)
+{
+	if (bucket->head == NULL)
+		bucket->head = skb;
+	else
+		bucket->tail->next = skb;
+	bucket->tail = skb;
+	skb->next = NULL;
+}
+
+static unsigned int hhf_drop(struct Qdisc *sch)
+{
+	struct hhf_sched_data *q = qdisc_priv(sch);
+	struct wdrr_bucket *bucket;
+
+	/* Always try to drop from heavy-hitters first. */
+	bucket = &q->buckets[WDRR_BUCKET_FOR_HH];
+	if (!bucket->head)
+		bucket = &q->buckets[WDRR_BUCKET_FOR_NON_HH];
+
+	if (bucket->head) {
+		struct sk_buff *skb = dequeue_head(bucket);
+
+		sch->q.qlen--;
+		sch->qstats.drops++;
+		sch->qstats.backlog -= qdisc_pkt_len(skb);
+		kfree_skb(skb);
+	}
+
+	/* Return id of the bucket from which the packet was dropped. */
+	return bucket - q->buckets;
+}
+
+static int hhf_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+{
+	struct hhf_sched_data *q = qdisc_priv(sch);
+	enum wdrr_bucket_idx idx;
+	struct wdrr_bucket *bucket;
+
+	idx = hhf_classify(skb, sch);
+
+	bucket = &q->buckets[idx];
+	bucket_add(bucket, skb);
+	sch->qstats.backlog += qdisc_pkt_len(skb);
+
+	if (list_empty(&bucket->bucketchain)) {
+		unsigned int weight;
+
+		/* The logic of new_buckets vs. old_buckets is the same as
+		 * new_flows vs. old_flows in the implementation of fq_codel,
+		 * i.e., short bursts of non-HHs should have strict priority.
+		 */
+		if (idx == WDRR_BUCKET_FOR_HH) {
+			/* Always move heavy-hitters to old bucket. */
+			weight = 1;
+			list_add_tail(&bucket->bucketchain, &q->old_buckets);
+		} else {
+			weight = q->hhf_non_hh_weight;
+			list_add_tail(&bucket->bucketchain, &q->new_buckets);
+		}
+		bucket->deficit = weight * q->quantum;
+	}
+	if (++sch->q.qlen < sch->limit)
+		return NET_XMIT_SUCCESS;
+
+	q->drop_overlimit++;
+	/* Return Congestion Notification only if we dropped a packet from this
+	 * bucket.
+	 */
+	if (hhf_drop(sch) == idx)
+		return NET_XMIT_CN;
+
+	/* As we dropped a packet, better let upper stack know this. */
+	qdisc_tree_decrease_qlen(sch, 1);
+	return NET_XMIT_SUCCESS;
+}
+
+static struct sk_buff *hhf_dequeue(struct Qdisc *sch)
+{
+	struct hhf_sched_data *q = qdisc_priv(sch);
+	struct sk_buff *skb = NULL;
+	struct wdrr_bucket *bucket;
+	struct list_head *head;
+
+begin:
+	head = &q->new_buckets;
+	if (list_empty(head)) {
+		head = &q->old_buckets;
+		if (list_empty(head))
+			return NULL;
+	}
+	bucket = list_first_entry(head, struct wdrr_bucket, bucketchain);
+
+	if (bucket->deficit <= 0) {
+		int weight = (bucket - q->buckets == WDRR_BUCKET_FOR_HH) ?
+			      1 : q->hhf_non_hh_weight;
+
+		bucket->deficit += weight * q->quantum;
+		list_move_tail(&bucket->bucketchain, &q->old_buckets);
+		goto begin;
+	}
+
+	if (bucket->head) {
+		skb = dequeue_head(bucket);
+		sch->q.qlen--;
+		sch->qstats.backlog -= qdisc_pkt_len(skb);
+	}
+
+	if (!skb) {
+		/* Force a pass through old_buckets to prevent starvation. */
+		if ((head == &q->new_buckets) && !list_empty(&q->old_buckets))
+			list_move_tail(&bucket->bucketchain, &q->old_buckets);
+		else
+			list_del_init(&bucket->bucketchain);
+		goto begin;
+	}
+	qdisc_bstats_update(sch, skb);
+	bucket->deficit -= qdisc_pkt_len(skb);
+
+	return skb;
+}
+
+static void hhf_reset(struct Qdisc *sch)
+{
+	struct sk_buff *skb;
+
+	while ((skb = hhf_dequeue(sch)) != NULL)
+		kfree_skb(skb);
+}
+
+static void *hhf_zalloc(size_t sz)
+{
+	void *ptr = kzalloc(sz, GFP_KERNEL | __GFP_NOWARN);
+
+	if (!ptr)
+		ptr = vzalloc(sz);
+
+	return ptr;
+}
+
+static void hhf_free(void *addr)
+{
+	if (addr) {
+		if (is_vmalloc_addr(addr))
+			vfree(addr);
+		else
+			kfree(addr);
+	}
+}
+
+static void hhf_destroy(struct Qdisc *sch)
+{
+	int i;
+	struct hhf_sched_data *q = qdisc_priv(sch);
+
+	for (i = 0; i < HHF_ARRAYS_CNT; i++) {
+		hhf_free(q->hhf_arrays[i]);
+		hhf_free(q->hhf_valid_bits[i]);
+	}
+
+	for (i = 0; i < HH_FLOWS_CNT; i++) {
+		struct hh_flow_state *flow, *next;
+		struct list_head *head = &q->hh_flows[i];
+
+		if (list_empty(head))
+			continue;
+		list_for_each_entry_safe(flow, next, head, flowchain) {
+			list_del(&flow->flowchain);
+			kfree(flow);
+		}
+	}
+	hhf_free(q->hh_flows);
+}
+
+static const struct nla_policy hhf_policy[TCA_HHF_MAX + 1] = {
+	[TCA_HHF_BACKLOG_LIMIT]	 = { .type = NLA_U32 },
+	[TCA_HHF_QUANTUM]	 = { .type = NLA_U32 },
+	[TCA_HHF_HH_FLOWS_LIMIT] = { .type = NLA_U32 },
+	[TCA_HHF_RESET_TIMEOUT]	 = { .type = NLA_U32 },
+	[TCA_HHF_ADMIT_BYTES]	 = { .type = NLA_U32 },
+	[TCA_HHF_EVICT_TIMEOUT]	 = { .type = NLA_U32 },
+	[TCA_HHF_NON_HH_WEIGHT]	 = { .type = NLA_U32 },
+};
+
+static int hhf_change(struct Qdisc *sch, struct nlattr *opt)
+{
+	struct hhf_sched_data *q = qdisc_priv(sch);
+	struct nlattr *tb[TCA_HHF_MAX + 1];
+	unsigned int qlen;
+	int err;
+	u64 non_hh_quantum;
+	u32 new_quantum = q->quantum;
+	u32 new_hhf_non_hh_weight = q->hhf_non_hh_weight;
+
+	if (!opt)
+		return -EINVAL;
+
+	err = nla_parse_nested(tb, TCA_HHF_MAX, opt, hhf_policy);
+	if (err < 0)
+		return err;
+
+	sch_tree_lock(sch);
+
+	if (tb[TCA_HHF_BACKLOG_LIMIT])
+		sch->limit = nla_get_u32(tb[TCA_HHF_BACKLOG_LIMIT]);
+
+	if (tb[TCA_HHF_QUANTUM])
+		new_quantum = nla_get_u32(tb[TCA_HHF_QUANTUM]);
+
+	if (tb[TCA_HHF_NON_HH_WEIGHT])
+		new_hhf_non_hh_weight = nla_get_u32(tb[TCA_HHF_NON_HH_WEIGHT]);
+
+	non_hh_quantum = (u64)new_quantum * new_hhf_non_hh_weight;
+	if (non_hh_quantum > INT_MAX)
+		return -EINVAL;
+	q->quantum = new_quantum;
+	q->hhf_non_hh_weight = new_hhf_non_hh_weight;
+
+	if (tb[TCA_HHF_HH_FLOWS_LIMIT])
+		q->hh_flows_limit = nla_get_u32(tb[TCA_HHF_HH_FLOWS_LIMIT]);
+
+	if (tb[TCA_HHF_RESET_TIMEOUT]) {
+		u32 ms = nla_get_u32(tb[TCA_HHF_RESET_TIMEOUT]);
+
+		q->hhf_reset_timeout = msecs_to_jiffies(ms);
+	}
+
+	if (tb[TCA_HHF_ADMIT_BYTES])
+		q->hhf_admit_bytes = nla_get_u32(tb[TCA_HHF_ADMIT_BYTES]);
+
+	if (tb[TCA_HHF_EVICT_TIMEOUT]) {
+		u32 ms = nla_get_u32(tb[TCA_HHF_EVICT_TIMEOUT]);
+
+		q->hhf_evict_timeout = msecs_to_jiffies(ms);
+	}
+
+	qlen = sch->q.qlen;
+	while (sch->q.qlen > sch->limit) {
+		struct sk_buff *skb = hhf_dequeue(sch);
+
+		kfree_skb(skb);
+	}
+	qdisc_tree_decrease_qlen(sch, qlen - sch->q.qlen);
+
+	sch_tree_unlock(sch);
+	return 0;
+}
+
+static int hhf_init(struct Qdisc *sch, struct nlattr *opt)
+{
+	struct hhf_sched_data *q = qdisc_priv(sch);
+	int i;
+
+	sch->limit = 1000;
+	q->quantum = psched_mtu(qdisc_dev(sch));
+	q->perturbation = net_random();
+	INIT_LIST_HEAD(&q->new_buckets);
+	INIT_LIST_HEAD(&q->old_buckets);
+
+	/* Configurable HHF parameters */
+	q->hhf_reset_timeout = HZ / 25; /* 40  ms */
+	q->hhf_admit_bytes = 131072;    /* 128 KB */
+	q->hhf_evict_timeout = HZ;      /* 1  sec */
+	q->hhf_non_hh_weight = 2;
+
+	if (opt) {
+		int err = hhf_change(sch, opt);
+
+		if (err)
+			return err;
+	}
+
+	if (!q->hh_flows) {
+		/* Initialize heavy-hitter flow table. */
+		q->hh_flows = hhf_zalloc(HH_FLOWS_CNT *
+					 sizeof(struct list_head));
+		if (!q->hh_flows)
+			return -ENOMEM;
+		for (i = 0; i < HH_FLOWS_CNT; i++)
+			INIT_LIST_HEAD(&q->hh_flows[i]);
+
+		/* Cap max active HHs at twice len of hh_flows table. */
+		q->hh_flows_limit = 2 * HH_FLOWS_CNT;
+		q->hh_flows_overlimit = 0;
+		q->hh_flows_total_cnt = 0;
+		q->hh_flows_current_cnt = 0;
+
+		/* Initialize heavy-hitter filter arrays. */
+		for (i = 0; i < HHF_ARRAYS_CNT; i++) {
+			q->hhf_arrays[i] = hhf_zalloc(HHF_ARRAYS_LEN *
+						      sizeof(u32));
+			if (!q->hhf_arrays[i]) {
+				hhf_destroy(sch);
+				return -ENOMEM;
+			}
+		}
+		q->hhf_arrays_reset_timestamp = hhf_time_stamp();
+
+		/* Initialize valid bits of heavy-hitter filter arrays. */
+		for (i = 0; i < HHF_ARRAYS_CNT; i++) {
+			q->hhf_valid_bits[i] = hhf_zalloc(HHF_ARRAYS_LEN /
+							  BITS_PER_BYTE);
+			if (!q->hhf_valid_bits[i]) {
+				hhf_destroy(sch);
+				return -ENOMEM;
+			}
+		}
+
+		/* Initialize Weighted DRR buckets. */
+		for (i = 0; i < WDRR_BUCKET_CNT; i++) {
+			struct wdrr_bucket *bucket = q->buckets + i;
+
+			INIT_LIST_HEAD(&bucket->bucketchain);
+		}
+	}
+
+	return 0;
+}
+
+static int hhf_dump(struct Qdisc *sch, struct sk_buff *skb)
+{
+	struct hhf_sched_data *q = qdisc_priv(sch);
+	struct nlattr *opts;
+
+	opts = nla_nest_start(skb, TCA_OPTIONS);
+	if (opts == NULL)
+		goto nla_put_failure;
+
+	if (nla_put_u32(skb, TCA_HHF_BACKLOG_LIMIT, sch->limit) ||
+	    nla_put_u32(skb, TCA_HHF_QUANTUM, q->quantum) ||
+	    nla_put_u32(skb, TCA_HHF_HH_FLOWS_LIMIT, q->hh_flows_limit) ||
+	    nla_put_u32(skb, TCA_HHF_RESET_TIMEOUT,
+			jiffies_to_msecs(q->hhf_reset_timeout)) ||
+	    nla_put_u32(skb, TCA_HHF_ADMIT_BYTES, q->hhf_admit_bytes) ||
+	    nla_put_u32(skb, TCA_HHF_EVICT_TIMEOUT,
+			jiffies_to_msecs(q->hhf_evict_timeout)) ||
+	    nla_put_u32(skb, TCA_HHF_NON_HH_WEIGHT, q->hhf_non_hh_weight))
+		goto nla_put_failure;
+
+	nla_nest_end(skb, opts);
+	return skb->len;
+
+nla_put_failure:
+	return -1;
+}
+
+static int hhf_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
+{
+	struct hhf_sched_data *q = qdisc_priv(sch);
+	struct tc_hhf_xstats st = {
+		.drop_overlimit = q->drop_overlimit,
+		.hh_overlimit	= q->hh_flows_overlimit,
+		.hh_tot_count	= q->hh_flows_total_cnt,
+		.hh_cur_count	= q->hh_flows_current_cnt,
+	};
+
+	return gnet_stats_copy_app(d, &st, sizeof(st));
+}
+
+static struct Qdisc_ops hhf_qdisc_ops __read_mostly = {
+	.id		=	"hhf",
+	.priv_size	=	sizeof(struct hhf_sched_data),
+
+	.enqueue	=	hhf_enqueue,
+	.dequeue	=	hhf_dequeue,
+	.peek		=	qdisc_peek_dequeued,
+	.drop		=	hhf_drop,
+	.init		=	hhf_init,
+	.reset		=	hhf_reset,
+	.destroy	=	hhf_destroy,
+	.change		=	hhf_change,
+	.dump		=	hhf_dump,
+	.dump_stats	=	hhf_dump_stats,
+	.owner		=	THIS_MODULE,
+};
+
+static int __init hhf_module_init(void)
+{
+	return register_qdisc(&hhf_qdisc_ops);
+}
+
+static void __exit hhf_module_exit(void)
+{
+	unregister_qdisc(&hhf_qdisc_ops);
+}
+
+module_init(hhf_module_init)
+module_exit(hhf_module_exit)
+MODULE_AUTHOR("Terry Lam");
+MODULE_AUTHOR("Nandita Dukkipati");
+MODULE_LICENSE("GPL");
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 830c64f25539..0db5a6eae87f 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -712,7 +712,7 @@ static s64 htb_do_events(struct htb_sched *q, const int level,
 
 	/* too much load - let's continue after a break for scheduling */
 	if (!(q->warned & HTB_WARN_TOOMANYEVENTS)) {
-		pr_warning("htb: too many events!\n");
+		pr_warn("htb: too many events!\n");
 		q->warned |= HTB_WARN_TOOMANYEVENTS;
 	}
 
@@ -1276,9 +1276,10 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg)
 	struct Qdisc *new_q = NULL;
 	int last_child = 0;
 
-	// TODO: why don't allow to delete subtree ? references ? does
-	// tc subsys quarantee us that in htb_destroy it holds no class
-	// refs so that we can remove children safely there ?
+	/* TODO: why don't allow to delete subtree ? references ? does
+	 * tc subsys guarantee us that in htb_destroy it holds no class
+	 * refs so that we can remove children safely there ?
+	 */
 	if (cl->children || cl->filter_cnt)
 		return -EBUSY;
 
@@ -1471,21 +1472,30 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
 		sch_tree_lock(sch);
 	}
 
+	rate64 = tb[TCA_HTB_RATE64] ? nla_get_u64(tb[TCA_HTB_RATE64]) : 0;
+
+	ceil64 = tb[TCA_HTB_CEIL64] ? nla_get_u64(tb[TCA_HTB_CEIL64]) : 0;
+
+	psched_ratecfg_precompute(&cl->rate, &hopt->rate, rate64);
+	psched_ratecfg_precompute(&cl->ceil, &hopt->ceil, ceil64);
+
 	/* it used to be a nasty bug here, we have to check that node
 	 * is really leaf before changing cl->un.leaf !
 	 */
 	if (!cl->level) {
-		cl->quantum = hopt->rate.rate / q->rate2quantum;
+		u64 quantum = cl->rate.rate_bytes_ps;
+
+		do_div(quantum, q->rate2quantum);
+		cl->quantum = min_t(u64, quantum, INT_MAX);
+
 		if (!hopt->quantum && cl->quantum < 1000) {
-			pr_warning(
-			       "HTB: quantum of class %X is small. Consider r2q change.\n",
-			       cl->common.classid);
+			pr_warn("HTB: quantum of class %X is small. Consider r2q change.\n",
+				cl->common.classid);
 			cl->quantum = 1000;
 		}
 		if (!hopt->quantum && cl->quantum > 200000) {
-			pr_warning(
-			       "HTB: quantum of class %X is big. Consider r2q change.\n",
-			       cl->common.classid);
+			pr_warn("HTB: quantum of class %X is big. Consider r2q change.\n",
+				cl->common.classid);
 			cl->quantum = 200000;
 		}
 		if (hopt->quantum)
@@ -1494,13 +1504,6 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
 			cl->prio = TC_HTB_NUMPRIO - 1;
 	}
 
-	rate64 = tb[TCA_HTB_RATE64] ? nla_get_u64(tb[TCA_HTB_RATE64]) : 0;
-
-	ceil64 = tb[TCA_HTB_CEIL64] ? nla_get_u64(tb[TCA_HTB_CEIL64]) : 0;
-
-	psched_ratecfg_precompute(&cl->rate, &hopt->rate, rate64);
-	psched_ratecfg_precompute(&cl->ceil, &hopt->ceil, ceil64);
-
 	cl->buffer = PSCHED_TICKS2NS(hopt->buffer);
 	cl->cbuffer = PSCHED_TICKS2NS(hopt->cbuffer);
 
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index f3befd6b4781..090a4e3ecd0d 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -88,7 +88,7 @@ struct netem_sched_data {
 	u32 duplicate;
 	u32 reorder;
 	u32 corrupt;
-	u32 rate;
+	u64 rate;
 	s32 packet_overhead;
 	u32 cell_size;
 	u32 cell_size_reciprocal;
@@ -495,7 +495,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 				now = netem_skb_cb(last)->time_to_send;
 			}
 
-			delay += packet_len_2_sched_time(skb->len, q);
+			delay += packet_len_2_sched_time(qdisc_pkt_len(skb), q);
 		}
 
 		cb->time_to_send = now + delay;
@@ -782,6 +782,7 @@ static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = {
 	[TCA_NETEM_RATE]	= { .len = sizeof(struct tc_netem_rate) },
 	[TCA_NETEM_LOSS]	= { .type = NLA_NESTED },
 	[TCA_NETEM_ECN]		= { .type = NLA_U32 },
+	[TCA_NETEM_RATE64]	= { .type = NLA_U64 },
 };
 
 static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla,
@@ -852,6 +853,10 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt)
 	if (tb[TCA_NETEM_RATE])
 		get_rate(sch, tb[TCA_NETEM_RATE]);
 
+	if (tb[TCA_NETEM_RATE64])
+		q->rate = max_t(u64, q->rate,
+				nla_get_u64(tb[TCA_NETEM_RATE64]));
+
 	if (tb[TCA_NETEM_ECN])
 		q->ecn = nla_get_u32(tb[TCA_NETEM_ECN]);
 
@@ -974,7 +979,13 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
 	if (nla_put(skb, TCA_NETEM_CORRUPT, sizeof(corrupt), &corrupt))
 		goto nla_put_failure;
 
-	rate.rate = q->rate;
+	if (q->rate >= (1ULL << 32)) {
+		if (nla_put_u64(skb, TCA_NETEM_RATE64, q->rate))
+			goto nla_put_failure;
+		rate.rate = ~0U;
+	} else {
+		rate.rate = q->rate;
+	}
 	rate.packet_overhead = q->packet_overhead;
 	rate.cell_size = q->cell_size;
 	rate.cell_overhead = q->cell_overhead;
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index a6090051c5db..fbba5b0ec121 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -118,6 +118,32 @@ struct tbf_sched_data {
 };
 
 
+/* Time to Length, convert time in ns to length in bytes
+ * to determinate how many bytes can be sent in given time.
+ */
+static u64 psched_ns_t2l(const struct psched_ratecfg *r,
+			 u64 time_in_ns)
+{
+	/* The formula is :
+	 * len = (time_in_ns * r->rate_bytes_ps) / NSEC_PER_SEC
+	 */
+	u64 len = time_in_ns * r->rate_bytes_ps;
+
+	do_div(len, NSEC_PER_SEC);
+
+	if (unlikely(r->linklayer == TC_LINKLAYER_ATM)) {
+		do_div(len, 53);
+		len = len * 48;
+	}
+
+	if (len > r->overhead)
+		len -= r->overhead;
+	else
+		len = 0;
+
+	return len;
+}
+
 /*
  * Return length of individual segments of a gso packet,
  * including all headers (MAC, IP, TCP/UDP)
@@ -281,6 +307,8 @@ static const struct nla_policy tbf_policy[TCA_TBF_MAX + 1] = {
 	[TCA_TBF_PTAB]	= { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
 	[TCA_TBF_RATE64]	= { .type = NLA_U64 },
 	[TCA_TBF_PRATE64]	= { .type = NLA_U64 },
+	[TCA_TBF_BURST] = { .type = NLA_U32 },
+	[TCA_TBF_PBURST] = { .type = NLA_U32 },
 };
 
 static int tbf_change(struct Qdisc *sch, struct nlattr *opt)
@@ -289,10 +317,11 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt)
 	struct tbf_sched_data *q = qdisc_priv(sch);
 	struct nlattr *tb[TCA_TBF_MAX + 1];
 	struct tc_tbf_qopt *qopt;
-	struct qdisc_rate_table *rtab = NULL;
-	struct qdisc_rate_table *ptab = NULL;
 	struct Qdisc *child = NULL;
-	int max_size, n;
+	struct psched_ratecfg rate;
+	struct psched_ratecfg peak;
+	u64 max_size;
+	s64 buffer, mtu;
 	u64 rate64 = 0, prate64 = 0;
 
 	err = nla_parse_nested(tb, TCA_TBF_MAX, opt, tbf_policy);
@@ -304,38 +333,13 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt)
 		goto done;
 
 	qopt = nla_data(tb[TCA_TBF_PARMS]);
-	rtab = qdisc_get_rtab(&qopt->rate, tb[TCA_TBF_RTAB]);
-	if (rtab == NULL)
-		goto done;
+	if (qopt->rate.linklayer == TC_LINKLAYER_UNAWARE)
+		qdisc_put_rtab(qdisc_get_rtab(&qopt->rate,
+					      tb[TCA_TBF_RTAB]));
 
-	if (qopt->peakrate.rate) {
-		if (qopt->peakrate.rate > qopt->rate.rate)
-			ptab = qdisc_get_rtab(&qopt->peakrate, tb[TCA_TBF_PTAB]);
-		if (ptab == NULL)
-			goto done;
-	}
-
-	for (n = 0; n < 256; n++)
-		if (rtab->data[n] > qopt->buffer)
-			break;
-	max_size = (n << qopt->rate.cell_log) - 1;
-	if (ptab) {
-		int size;
-
-		for (n = 0; n < 256; n++)
-			if (ptab->data[n] > qopt->mtu)
-				break;
-		size = (n << qopt->peakrate.cell_log) - 1;
-		if (size < max_size)
-			max_size = size;
-	}
-	if (max_size < 0)
-		goto done;
-
-	if (max_size < psched_mtu(qdisc_dev(sch)))
-		pr_warn_ratelimited("sch_tbf: burst %u is lower than device %s mtu (%u) !\n",
-				    max_size, qdisc_dev(sch)->name,
-				    psched_mtu(qdisc_dev(sch)));
+	if (qopt->peakrate.linklayer == TC_LINKLAYER_UNAWARE)
+			qdisc_put_rtab(qdisc_get_rtab(&qopt->peakrate,
+						      tb[TCA_TBF_PTAB]));
 
 	if (q->qdisc != &noop_qdisc) {
 		err = fifo_set_limit(q->qdisc, qopt->limit);
@@ -349,6 +353,50 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt)
 		}
 	}
 
+	buffer = min_t(u64, PSCHED_TICKS2NS(qopt->buffer), ~0U);
+	mtu = min_t(u64, PSCHED_TICKS2NS(qopt->mtu), ~0U);
+
+	if (tb[TCA_TBF_RATE64])
+		rate64 = nla_get_u64(tb[TCA_TBF_RATE64]);
+	psched_ratecfg_precompute(&rate, &qopt->rate, rate64);
+
+	if (tb[TCA_TBF_BURST]) {
+		max_size = nla_get_u32(tb[TCA_TBF_BURST]);
+		buffer = psched_l2t_ns(&rate, max_size);
+	} else {
+		max_size = min_t(u64, psched_ns_t2l(&rate, buffer), ~0U);
+	}
+
+	if (qopt->peakrate.rate) {
+		if (tb[TCA_TBF_PRATE64])
+			prate64 = nla_get_u64(tb[TCA_TBF_PRATE64]);
+		psched_ratecfg_precompute(&peak, &qopt->peakrate, prate64);
+		if (peak.rate_bytes_ps <= rate.rate_bytes_ps) {
+			pr_warn_ratelimited("sch_tbf: peakrate %llu is lower than or equals to rate %llu !\n",
+					peak.rate_bytes_ps, rate.rate_bytes_ps);
+			err = -EINVAL;
+			goto done;
+		}
+
+		if (tb[TCA_TBF_PBURST]) {
+			u32 pburst = nla_get_u32(tb[TCA_TBF_PBURST]);
+			max_size = min_t(u32, max_size, pburst);
+			mtu = psched_l2t_ns(&peak, pburst);
+		} else {
+			max_size = min_t(u64, max_size, psched_ns_t2l(&peak, mtu));
+		}
+	}
+
+	if (max_size < psched_mtu(qdisc_dev(sch)))
+		pr_warn_ratelimited("sch_tbf: burst %llu is lower than device %s mtu (%u) !\n",
+				    max_size, qdisc_dev(sch)->name,
+				    psched_mtu(qdisc_dev(sch)));
+
+	if (!max_size) {
+		err = -EINVAL;
+		goto done;
+	}
+
 	sch_tree_lock(sch);
 	if (child) {
 		qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen);
@@ -356,19 +404,21 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt)
 		q->qdisc = child;
 	}
 	q->limit = qopt->limit;
-	q->mtu = PSCHED_TICKS2NS(qopt->mtu);
+	if (tb[TCA_TBF_PBURST])
+		q->mtu = mtu;
+	else
+		q->mtu = PSCHED_TICKS2NS(qopt->mtu);
 	q->max_size = max_size;
-	q->buffer = PSCHED_TICKS2NS(qopt->buffer);
+	if (tb[TCA_TBF_BURST])
+		q->buffer = buffer;
+	else
+		q->buffer = PSCHED_TICKS2NS(qopt->buffer);
 	q->tokens = q->buffer;
 	q->ptokens = q->mtu;
 
-	if (tb[TCA_TBF_RATE64])
-		rate64 = nla_get_u64(tb[TCA_TBF_RATE64]);
-	psched_ratecfg_precompute(&q->rate, &rtab->rate, rate64);
-	if (ptab) {
-		if (tb[TCA_TBF_PRATE64])
-			prate64 = nla_get_u64(tb[TCA_TBF_PRATE64]);
-		psched_ratecfg_precompute(&q->peak, &ptab->rate, prate64);
+	memcpy(&q->rate, &rate, sizeof(struct psched_ratecfg));
+	if (qopt->peakrate.rate) {
+		memcpy(&q->peak, &peak, sizeof(struct psched_ratecfg));
 		q->peak_present = true;
 	} else {
 		q->peak_present = false;
@@ -377,10 +427,6 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt)
 	sch_tree_unlock(sch);
 	err = 0;
 done:
-	if (rtab)
-		qdisc_put_rtab(rtab);
-	if (ptab)
-		qdisc_put_rtab(ptab);
 	return err;
 }