summaryrefslogtreecommitdiff
path: root/net/sched
diff options
context:
space:
mode:
Diffstat (limited to 'net/sched')
-rw-r--r--net/sched/Kconfig9
-rw-r--r--net/sched/Makefile1
-rw-r--r--net/sched/act_api.c255
-rw-r--r--net/sched/act_csum.c13
-rw-r--r--net/sched/act_gact.c13
-rw-r--r--net/sched/act_ipt.c21
-rw-r--r--net/sched/act_mirred.c18
-rw-r--r--net/sched/act_nat.c12
-rw-r--r--net/sched/act_pedit.c12
-rw-r--r--net/sched/act_police.c69
-rw-r--r--net/sched/act_simple.c20
-rw-r--r--net/sched/act_skbedit.c13
-rw-r--r--net/sched/cls_api.c117
-rw-r--r--net/sched/cls_basic.c13
-rw-r--r--net/sched/cls_bpf.c13
-rw-r--r--net/sched/cls_cgroup.c14
-rw-r--r--net/sched/cls_flow.c15
-rw-r--r--net/sched/cls_fw.c13
-rw-r--r--net/sched/cls_route.c13
-rw-r--r--net/sched/cls_rsvp.h13
-rw-r--r--net/sched/cls_tcindex.c17
-rw-r--r--net/sched/cls_u32.c13
-rw-r--r--net/sched/em_meta.c2
-rw-r--r--net/sched/sch_api.c7
-rw-r--r--net/sched/sch_cbq.c7
-rw-r--r--net/sched/sch_dsmark.c37
-rw-r--r--net/sched/sch_fq.c36
-rw-r--r--net/sched/sch_generic.c2
-rw-r--r--net/sched/sch_gred.c4
-rw-r--r--net/sched/sch_hhf.c745
-rw-r--r--net/sched/sch_htb.c39
-rw-r--r--net/sched/sch_netem.c17
-rw-r--r--net/sched/sch_tbf.c140
33 files changed, 1226 insertions, 507 deletions
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index f711a471d0b7..d3d7a0a66e28 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -286,6 +286,15 @@ config NET_SCH_FQ
If unsure, say N.
+config NET_SCH_HHF
+ tristate "Heavy-Hitter Filter (HHF)"
+ help
+ Say Y here if you want to use the Heavy-Hitter Filter (HHF)
+ packet scheduling algorithm.
+
+ To compile this driver as a module, choose M here: the module
+ will be called sch_hhf.
+
config NET_SCH_INGRESS
tristate "Ingress Qdisc"
depends on NET_CLS_ACT
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 35fa47a494ab..3442e5fbc4d7 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -40,6 +40,7 @@ obj-$(CONFIG_NET_SCH_QFQ) += sch_qfq.o
obj-$(CONFIG_NET_SCH_CODEL) += sch_codel.o
obj-$(CONFIG_NET_SCH_FQ_CODEL) += sch_fq_codel.o
obj-$(CONFIG_NET_SCH_FQ) += sch_fq.o
+obj-$(CONFIG_NET_SCH_HHF) += sch_hhf.o
obj-$(CONFIG_NET_CLS_U32) += cls_u32.o
obj-$(CONFIG_NET_CLS_ROUTE4) += cls_route.o
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 4adbce8f8314..6f103fd76c17 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -29,25 +29,16 @@
void tcf_hash_destroy(struct tcf_common *p, struct tcf_hashinfo *hinfo)
{
- unsigned int h = tcf_hash(p->tcfc_index, hinfo->hmask);
- struct tcf_common **p1p;
-
- for (p1p = &hinfo->htab[h]; *p1p; p1p = &(*p1p)->tcfc_next) {
- if (*p1p == p) {
- write_lock_bh(hinfo->lock);
- *p1p = p->tcfc_next;
- write_unlock_bh(hinfo->lock);
- gen_kill_estimator(&p->tcfc_bstats,
- &p->tcfc_rate_est);
- /*
- * gen_estimator est_timer() might access p->tcfc_lock
- * or bstats, wait a RCU grace period before freeing p
- */
- kfree_rcu(p, tcfc_rcu);
- return;
- }
- }
- WARN_ON(1);
+ spin_lock_bh(&hinfo->lock);
+ hlist_del(&p->tcfc_head);
+ spin_unlock_bh(&hinfo->lock);
+ gen_kill_estimator(&p->tcfc_bstats,
+ &p->tcfc_rate_est);
+ /*
+ * gen_estimator est_timer() might access p->tcfc_lock
+ * or bstats, wait a RCU grace period before freeing p
+ */
+ kfree_rcu(p, tcfc_rcu);
}
EXPORT_SYMBOL(tcf_hash_destroy);
@@ -73,18 +64,19 @@ EXPORT_SYMBOL(tcf_hash_release);
static int tcf_dump_walker(struct sk_buff *skb, struct netlink_callback *cb,
struct tc_action *a, struct tcf_hashinfo *hinfo)
{
+ struct hlist_head *head;
struct tcf_common *p;
int err = 0, index = -1, i = 0, s_i = 0, n_i = 0;
struct nlattr *nest;
- read_lock_bh(hinfo->lock);
+ spin_lock_bh(&hinfo->lock);
s_i = cb->args[0];
for (i = 0; i < (hinfo->hmask + 1); i++) {
- p = hinfo->htab[tcf_hash(i, hinfo->hmask)];
+ head = &hinfo->htab[tcf_hash(i, hinfo->hmask)];
- for (; p; p = p->tcfc_next) {
+ hlist_for_each_entry_rcu(p, head, tcfc_head) {
index++;
if (index < s_i)
continue;
@@ -107,7 +99,7 @@ static int tcf_dump_walker(struct sk_buff *skb, struct netlink_callback *cb,
}
}
done:
- read_unlock_bh(hinfo->lock);
+ spin_unlock_bh(&hinfo->lock);
if (n_i)
cb->args[0] += n_i;
return n_i;
@@ -120,7 +112,9 @@ nla_put_failure:
static int tcf_del_walker(struct sk_buff *skb, struct tc_action *a,
struct tcf_hashinfo *hinfo)
{
- struct tcf_common *p, *s_p;
+ struct hlist_head *head;
+ struct hlist_node *n;
+ struct tcf_common *p;
struct nlattr *nest;
int i = 0, n_i = 0;
@@ -130,14 +124,11 @@ static int tcf_del_walker(struct sk_buff *skb, struct tc_action *a,
if (nla_put_string(skb, TCA_KIND, a->ops->kind))
goto nla_put_failure;
for (i = 0; i < (hinfo->hmask + 1); i++) {
- p = hinfo->htab[tcf_hash(i, hinfo->hmask)];
-
- while (p != NULL) {
- s_p = p->tcfc_next;
+ head = &hinfo->htab[tcf_hash(i, hinfo->hmask)];
+ hlist_for_each_entry_safe(p, n, head, tcfc_head) {
if (ACT_P_DELETED == tcf_hash_release(p, 0, hinfo))
module_put(a->ops->owner);
n_i++;
- p = s_p;
}
}
if (nla_put_u32(skb, TCA_FCNT, n_i))
@@ -150,8 +141,8 @@ nla_put_failure:
return -EINVAL;
}
-int tcf_generic_walker(struct sk_buff *skb, struct netlink_callback *cb,
- int type, struct tc_action *a)
+static int tcf_generic_walker(struct sk_buff *skb, struct netlink_callback *cb,
+ int type, struct tc_action *a)
{
struct tcf_hashinfo *hinfo = a->ops->hinfo;
@@ -164,19 +155,18 @@ int tcf_generic_walker(struct sk_buff *skb, struct netlink_callback *cb,
return -EINVAL;
}
}
-EXPORT_SYMBOL(tcf_generic_walker);
struct tcf_common *tcf_hash_lookup(u32 index, struct tcf_hashinfo *hinfo)
{
- struct tcf_common *p;
+ struct tcf_common *p = NULL;
+ struct hlist_head *head;
- read_lock_bh(hinfo->lock);
- for (p = hinfo->htab[tcf_hash(index, hinfo->hmask)]; p;
- p = p->tcfc_next) {
+ spin_lock_bh(&hinfo->lock);
+ head = &hinfo->htab[tcf_hash(index, hinfo->hmask)];
+ hlist_for_each_entry_rcu(p, head, tcfc_head)
if (p->tcfc_index == index)
break;
- }
- read_unlock_bh(hinfo->lock);
+ spin_unlock_bh(&hinfo->lock);
return p;
}
@@ -196,7 +186,7 @@ u32 tcf_hash_new_index(u32 *idx_gen, struct tcf_hashinfo *hinfo)
}
EXPORT_SYMBOL(tcf_hash_new_index);
-int tcf_hash_search(struct tc_action *a, u32 index)
+static int tcf_hash_search(struct tc_action *a, u32 index)
{
struct tcf_hashinfo *hinfo = a->ops->hinfo;
struct tcf_common *p = tcf_hash_lookup(index, hinfo);
@@ -207,7 +197,6 @@ int tcf_hash_search(struct tc_action *a, u32 index)
}
return 0;
}
-EXPORT_SYMBOL(tcf_hash_search);
struct tcf_common *tcf_hash_check(u32 index, struct tc_action *a, int bind,
struct tcf_hashinfo *hinfo)
@@ -236,6 +225,7 @@ struct tcf_common *tcf_hash_create(u32 index, struct nlattr *est,
p->tcfc_bindcnt = 1;
spin_lock_init(&p->tcfc_lock);
+ INIT_HLIST_NODE(&p->tcfc_head);
p->tcfc_index = index ? index : tcf_hash_new_index(idx_gen, hinfo);
p->tcfc_tm.install = jiffies;
p->tcfc_tm.lastuse = jiffies;
@@ -257,19 +247,18 @@ void tcf_hash_insert(struct tcf_common *p, struct tcf_hashinfo *hinfo)
{
unsigned int h = tcf_hash(p->tcfc_index, hinfo->hmask);
- write_lock_bh(hinfo->lock);
- p->tcfc_next = hinfo->htab[h];
- hinfo->htab[h] = p;
- write_unlock_bh(hinfo->lock);
+ spin_lock_bh(&hinfo->lock);
+ hlist_add_head(&p->tcfc_head, &hinfo->htab[h]);
+ spin_unlock_bh(&hinfo->lock);
}
EXPORT_SYMBOL(tcf_hash_insert);
-static struct tc_action_ops *act_base = NULL;
+static LIST_HEAD(act_base);
static DEFINE_RWLOCK(act_mod_lock);
int tcf_register_action(struct tc_action_ops *act)
{
- struct tc_action_ops *a, **ap;
+ struct tc_action_ops *a;
/* Must supply act, dump, cleanup and init */
if (!act->act || !act->dump || !act->cleanup || !act->init)
@@ -282,14 +271,13 @@ int tcf_register_action(struct tc_action_ops *act)
act->walk = tcf_generic_walker;
write_lock(&act_mod_lock);
- for (ap = &act_base; (a = *ap) != NULL; ap = &a->next) {
+ list_for_each_entry(a, &act_base, head) {
if (act->type == a->type || (strcmp(act->kind, a->kind) == 0)) {
write_unlock(&act_mod_lock);
return -EEXIST;
}
}
- act->next = NULL;
- *ap = act;
+ list_add_tail(&act->head, &act_base);
write_unlock(&act_mod_lock);
return 0;
}
@@ -297,17 +285,16 @@ EXPORT_SYMBOL(tcf_register_action);
int tcf_unregister_action(struct tc_action_ops *act)
{
- struct tc_action_ops *a, **ap;
+ struct tc_action_ops *a;
int err = -ENOENT;
write_lock(&act_mod_lock);
- for (ap = &act_base; (a = *ap) != NULL; ap = &a->next)
- if (a == act)
+ list_for_each_entry(a, &act_base, head) {
+ if (a == act) {
+ list_del(&act->head);
+ err = 0;
break;
- if (a) {
- *ap = a->next;
- a->next = NULL;
- err = 0;
+ }
}
write_unlock(&act_mod_lock);
return err;
@@ -317,69 +304,42 @@ EXPORT_SYMBOL(tcf_unregister_action);
/* lookup by name */
static struct tc_action_ops *tc_lookup_action_n(char *kind)
{
- struct tc_action_ops *a = NULL;
+ struct tc_action_ops *a, *res = NULL;
if (kind) {
read_lock(&act_mod_lock);
- for (a = act_base; a; a = a->next) {
+ list_for_each_entry(a, &act_base, head) {
if (strcmp(kind, a->kind) == 0) {
- if (!try_module_get(a->owner)) {
- read_unlock(&act_mod_lock);
- return NULL;
- }
+ if (try_module_get(a->owner))
+ res = a;
break;
}
}
read_unlock(&act_mod_lock);
}
- return a;
+ return res;
}
/* lookup by nlattr */
static struct tc_action_ops *tc_lookup_action(struct nlattr *kind)
{
- struct tc_action_ops *a = NULL;
+ struct tc_action_ops *a, *res = NULL;
if (kind) {
read_lock(&act_mod_lock);
- for (a = act_base; a; a = a->next) {
+ list_for_each_entry(a, &act_base, head) {
if (nla_strcmp(kind, a->kind) == 0) {
- if (!try_module_get(a->owner)) {
- read_unlock(&act_mod_lock);
- return NULL;
- }
+ if (try_module_get(a->owner))
+ res = a;
break;
}
}
read_unlock(&act_mod_lock);
}
- return a;
+ return res;
}
-#if 0
-/* lookup by id */
-static struct tc_action_ops *tc_lookup_action_id(u32 type)
-{
- struct tc_action_ops *a = NULL;
-
- if (type) {
- read_lock(&act_mod_lock);
- for (a = act_base; a; a = a->next) {
- if (a->type == type) {
- if (!try_module_get(a->owner)) {
- read_unlock(&act_mod_lock);
- return NULL;
- }
- break;
- }
- }
- read_unlock(&act_mod_lock);
- }
- return a;
-}
-#endif
-
-int tcf_action_exec(struct sk_buff *skb, const struct tc_action *act,
+int tcf_action_exec(struct sk_buff *skb, const struct list_head *actions,
struct tcf_result *res)
{
const struct tc_action *a;
@@ -390,7 +350,7 @@ int tcf_action_exec(struct sk_buff *skb, const struct tc_action *act,
ret = TC_ACT_OK;
goto exec_done;
}
- while ((a = act) != NULL) {
+ list_for_each_entry(a, actions, list) {
repeat:
if (a->ops) {
ret = a->ops->act(skb, a, res);
@@ -404,27 +364,26 @@ repeat:
if (ret != TC_ACT_PIPE)
goto exec_done;
}
- act = a->next;
}
exec_done:
return ret;
}
EXPORT_SYMBOL(tcf_action_exec);
-void tcf_action_destroy(struct tc_action *act, int bind)
+void tcf_action_destroy(struct list_head *actions, int bind)
{
- struct tc_action *a;
+ struct tc_action *a, *tmp;
- for (a = act; a; a = act) {
+ list_for_each_entry_safe(a, tmp, actions, list) {
if (a->ops) {
if (a->ops->cleanup(a, bind) == ACT_P_DELETED)
module_put(a->ops->owner);
- act = act->next;
+ list_del(&a->list);
kfree(a);
} else {
/*FIXME: Remove later - catch insertion bugs*/
WARN(1, "tcf_action_destroy: BUG? destroying NULL ops\n");
- act = act->next;
+ list_del(&a->list);
kfree(a);
}
}
@@ -470,14 +429,13 @@ nla_put_failure:
EXPORT_SYMBOL(tcf_action_dump_1);
int
-tcf_action_dump(struct sk_buff *skb, struct tc_action *act, int bind, int ref)
+tcf_action_dump(struct sk_buff *skb, struct list_head *actions, int bind, int ref)
{
struct tc_action *a;
int err = -EINVAL;
struct nlattr *nest;
- while ((a = act) != NULL) {
- act = a->next;
+ list_for_each_entry(a, actions, list) {
nest = nla_nest_start(skb, a->order);
if (nest == NULL)
goto nla_put_failure;
@@ -552,6 +510,7 @@ struct tc_action *tcf_action_init_1(struct net *net, struct nlattr *nla,
if (a == NULL)
goto err_mod;
+ INIT_LIST_HEAD(&a->list);
/* backward compatibility for policer */
if (name == NULL)
err = a_o->init(net, tb[TCA_ACT_OPTIONS], est, a, ovr, bind);
@@ -578,37 +537,33 @@ err_out:
return ERR_PTR(err);
}
-struct tc_action *tcf_action_init(struct net *net, struct nlattr *nla,
+int tcf_action_init(struct net *net, struct nlattr *nla,
struct nlattr *est, char *name, int ovr,
- int bind)
+ int bind, struct list_head *actions)
{
struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
- struct tc_action *head = NULL, *act, *act_prev = NULL;
+ struct tc_action *act;
int err;
int i;
err = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL);
if (err < 0)
- return ERR_PTR(err);
+ return err;
for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
act = tcf_action_init_1(net, tb[i], est, name, ovr, bind);
- if (IS_ERR(act))
+ if (IS_ERR(act)) {
+ err = PTR_ERR(act);
goto err;
+ }
act->order = i;
-
- if (head == NULL)
- head = act;
- else
- act_prev->next = act;
- act_prev = act;
+ list_add_tail(&act->list, actions);
}
- return head;
+ return 0;
err:
- if (head != NULL)
- tcf_action_destroy(head, bind);
- return act;
+ tcf_action_destroy(actions, bind);
+ return err;
}
int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *a,
@@ -637,10 +592,6 @@ int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *a,
if (err < 0)
goto errout;
- if (a->ops != NULL && a->ops->get_stats != NULL)
- if (a->ops->get_stats(skb, a) < 0)
- goto errout;
-
if (gnet_stats_copy_basic(&d, &h->tcf_bstats) < 0 ||
gnet_stats_copy_rate_est(&d, &h->tcf_bstats,
&h->tcf_rate_est) < 0 ||
@@ -657,7 +608,7 @@ errout:
}
static int
-tca_get_fill(struct sk_buff *skb, struct tc_action *a, u32 portid, u32 seq,
+tca_get_fill(struct sk_buff *skb, struct list_head *actions, u32 portid, u32 seq,
u16 flags, int event, int bind, int ref)
{
struct tcamsg *t;
@@ -677,7 +628,7 @@ tca_get_fill(struct sk_buff *skb, struct tc_action *a, u32 portid, u32 seq,
if (nest == NULL)
goto out_nlmsg_trim;
- if (tcf_action_dump(skb, a, bind, ref) < 0)
+ if (tcf_action_dump(skb, actions, bind, ref) < 0)
goto out_nlmsg_trim;
nla_nest_end(skb, nest);
@@ -692,14 +643,14 @@ out_nlmsg_trim:
static int
act_get_notify(struct net *net, u32 portid, struct nlmsghdr *n,
- struct tc_action *a, int event)
+ struct list_head *actions, int event)
{
struct sk_buff *skb;
skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
if (!skb)
return -ENOBUFS;
- if (tca_get_fill(skb, a, portid, n->nlmsg_seq, 0, event, 0, 0) <= 0) {
+ if (tca_get_fill(skb, actions, portid, n->nlmsg_seq, 0, event, 0, 0) <= 0) {
kfree_skb(skb);
return -EINVAL;
}
@@ -730,6 +681,7 @@ tcf_action_get_1(struct nlattr *nla, struct nlmsghdr *n, u32 portid)
if (a == NULL)
goto err_out;
+ INIT_LIST_HEAD(&a->list);
err = -EINVAL;
a->ops = tc_lookup_action(tb[TCA_ACT_KIND]);
if (a->ops == NULL)
@@ -749,12 +701,12 @@ err_out:
return ERR_PTR(err);
}
-static void cleanup_a(struct tc_action *act)
+static void cleanup_a(struct list_head *actions)
{
- struct tc_action *a;
+ struct tc_action *a, *tmp;
- for (a = act; a; a = act) {
- act = a->next;
+ list_for_each_entry_safe(a, tmp, actions, list) {
+ list_del(&a->list);
kfree(a);
}
}
@@ -769,6 +721,7 @@ static struct tc_action *create_a(int i)
return NULL;
}
act->order = i;
+ INIT_LIST_HEAD(&act->list);
return act;
}
@@ -856,7 +809,8 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
{
int i, ret;
struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
- struct tc_action *head = NULL, *act, *act_prev = NULL;
+ struct tc_action *act;
+ LIST_HEAD(actions);
ret = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL);
if (ret < 0)
@@ -876,16 +830,11 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
goto err;
}
act->order = i;
-
- if (head == NULL)
- head = act;
- else
- act_prev->next = act;
- act_prev = act;
+ list_add_tail(&act->list, &actions);
}
if (event == RTM_GETACTION)
- ret = act_get_notify(net, portid, n, head, event);
+ ret = act_get_notify(net, portid, n, &actions, event);
else { /* delete */
struct sk_buff *skb;
@@ -895,7 +844,7 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
goto err;
}
- if (tca_get_fill(skb, head, portid, n->nlmsg_seq, 0, event,
+ if (tca_get_fill(skb, &actions, portid, n->nlmsg_seq, 0, event,
0, 1) <= 0) {
kfree_skb(skb);
ret = -EINVAL;
@@ -903,7 +852,7 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
}
/* now do the delete */
- tcf_action_destroy(head, 0);
+ tcf_action_destroy(&actions, 0);
ret = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
n->nlmsg_flags & NLM_F_ECHO);
if (ret > 0)
@@ -911,11 +860,11 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
return ret;
}
err:
- cleanup_a(head);
+ cleanup_a(&actions);
return ret;
}
-static int tcf_add_notify(struct net *net, struct tc_action *a,
+static int tcf_add_notify(struct net *net, struct list_head *actions,
u32 portid, u32 seq, int event, u16 flags)
{
struct tcamsg *t;
@@ -943,7 +892,7 @@ static int tcf_add_notify(struct net *net, struct tc_action *a,
if (nest == NULL)
goto out_kfree_skb;
- if (tcf_action_dump(skb, a, 0, 0) < 0)
+ if (tcf_action_dump(skb, actions, 0, 0) < 0)
goto out_kfree_skb;
nla_nest_end(skb, nest);
@@ -967,26 +916,18 @@ tcf_action_add(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
u32 portid, int ovr)
{
int ret = 0;
- struct tc_action *act;
- struct tc_action *a;
+ LIST_HEAD(actions);
u32 seq = n->nlmsg_seq;
- act = tcf_action_init(net, nla, NULL, NULL, ovr, 0);
- if (act == NULL)
- goto done;
- if (IS_ERR(act)) {
- ret = PTR_ERR(act);
+ ret = tcf_action_init(net, nla, NULL, NULL, ovr, 0, &actions);
+ if (ret)
goto done;
- }
/* dump then free all the actions after update; inserted policy
* stays intact
*/
- ret = tcf_add_notify(net, act, portid, seq, RTM_NEWACTION, n->nlmsg_flags);
- for (a = act; a; a = act) {
- act = a->next;
- kfree(a);
- }
+ ret = tcf_add_notify(net, &actions, portid, seq, RTM_NEWACTION, n->nlmsg_flags);
+ cleanup_a(&actions);
done:
return ret;
}
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index 5c5edf56adbd..9cc6717c5f19 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -37,15 +37,8 @@
#include <net/tc_act/tc_csum.h>
#define CSUM_TAB_MASK 15
-static struct tcf_common *tcf_csum_ht[CSUM_TAB_MASK + 1];
static u32 csum_idx_gen;
-static DEFINE_RWLOCK(csum_lock);
-
-static struct tcf_hashinfo csum_hash_info = {
- .htab = tcf_csum_ht,
- .hmask = CSUM_TAB_MASK,
- .lock = &csum_lock,
-};
+static struct tcf_hashinfo csum_hash_info;
static const struct nla_policy csum_policy[TCA_CSUM_MAX + 1] = {
[TCA_CSUM_PARMS] = { .len = sizeof(struct tc_csum), },
@@ -593,6 +586,10 @@ MODULE_LICENSE("GPL");
static int __init csum_init_module(void)
{
+ int err = tcf_hashinfo_init(&csum_hash_info, CSUM_TAB_MASK);
+ if (err)
+ return err;
+
return tcf_register_action(&act_csum_ops);
}
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index 5645a4d32abd..dea927343bf4 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -24,15 +24,8 @@
#include <net/tc_act/tc_gact.h>
#define GACT_TAB_MASK 15
-static struct tcf_common *tcf_gact_ht[GACT_TAB_MASK + 1];
static u32 gact_idx_gen;
-static DEFINE_RWLOCK(gact_lock);
-
-static struct tcf_hashinfo gact_hash_info = {
- .htab = tcf_gact_ht,
- .hmask = GACT_TAB_MASK,
- .lock = &gact_lock,
-};
+static struct tcf_hashinfo gact_hash_info;
#ifdef CONFIG_GACT_PROB
static int gact_net_rand(struct tcf_gact *gact)
@@ -215,6 +208,9 @@ MODULE_LICENSE("GPL");
static int __init gact_init_module(void)
{
+ int err = tcf_hashinfo_init(&gact_hash_info, GACT_TAB_MASK);
+ if (err)
+ return err;
#ifdef CONFIG_GACT_PROB
pr_info("GACT probability on\n");
#else
@@ -226,6 +222,7 @@ static int __init gact_init_module(void)
static void __exit gact_cleanup_module(void)
{
tcf_unregister_action(&act_gact_ops);
+ tcf_hashinfo_destroy(&gact_hash_info);
}
module_init(gact_init_module);
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 882a89762f77..e13ecbbfe8c4 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -29,15 +29,8 @@
#define IPT_TAB_MASK 15
-static struct tcf_common *tcf_ipt_ht[IPT_TAB_MASK + 1];
static u32 ipt_idx_gen;
-static DEFINE_RWLOCK(ipt_lock);
-
-static struct tcf_hashinfo ipt_hash_info = {
- .htab = tcf_ipt_ht,
- .hmask = IPT_TAB_MASK,
- .lock = &ipt_lock,
-};
+static struct tcf_hashinfo ipt_hash_info;
static int ipt_init_target(struct xt_entry_target *t, char *table, unsigned int hook)
{
@@ -320,7 +313,11 @@ MODULE_ALIAS("act_xt");
static int __init ipt_init_module(void)
{
- int ret1, ret2;
+ int ret1, ret2, err;
+ err = tcf_hashinfo_init(&ipt_hash_info, IPT_TAB_MASK);
+ if (err)
+ return err;
+
ret1 = tcf_register_action(&act_xt_ops);
if (ret1 < 0)
printk("Failed to load xt action\n");
@@ -328,9 +325,10 @@ static int __init ipt_init_module(void)
if (ret2 < 0)
printk("Failed to load ipt action\n");
- if (ret1 < 0 && ret2 < 0)
+ if (ret1 < 0 && ret2 < 0) {
+ tcf_hashinfo_destroy(&ipt_hash_info);
return ret1;
- else
+ } else
return 0;
}
@@ -338,6 +336,7 @@ static void __exit ipt_cleanup_module(void)
{
tcf_unregister_action(&act_xt_ops);
tcf_unregister_action(&act_ipt_ops);
+ tcf_hashinfo_destroy(&ipt_hash_info);
}
module_init(ipt_init_module);
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 252378121ce7..9dbb8cd64cb0 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -30,16 +30,9 @@
#include <linux/if_arp.h>
#define MIRRED_TAB_MASK 7
-static struct tcf_common *tcf_mirred_ht[MIRRED_TAB_MASK + 1];
static u32 mirred_idx_gen;
-static DEFINE_RWLOCK(mirred_lock);
static LIST_HEAD(mirred_list);
-
-static struct tcf_hashinfo mirred_hash_info = {
- .htab = tcf_mirred_ht,
- .hmask = MIRRED_TAB_MASK,
- .lock = &mirred_lock,
-};
+static struct tcf_hashinfo mirred_hash_info;
static int tcf_mirred_release(struct tcf_mirred *m, int bind)
{
@@ -261,7 +254,6 @@ static struct notifier_block mirred_device_notifier = {
.notifier_call = mirred_device_event,
};
-
static struct tc_action_ops act_mirred_ops = {
.kind = "mirred",
.hinfo = &mirred_hash_info,
@@ -284,14 +276,20 @@ static int __init mirred_init_module(void)
if (err)
return err;
+ err = tcf_hashinfo_init(&mirred_hash_info, MIRRED_TAB_MASK);
+ if (err) {
+ unregister_netdevice_notifier(&mirred_device_notifier);
+ return err;
+ }
pr_info("Mirror/redirect action on\n");
return tcf_register_action(&act_mirred_ops);
}
static void __exit mirred_cleanup_module(void)
{
- unregister_netdevice_notifier(&mirred_device_notifier);
tcf_unregister_action(&act_mirred_ops);
+ tcf_hashinfo_destroy(&mirred_hash_info);
+ unregister_netdevice_notifier(&mirred_device_notifier);
}
module_init(mirred_init_module);
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index 6a15ace00241..921fea43fca2 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -30,15 +30,9 @@
#define NAT_TAB_MASK 15
-static struct tcf_common *tcf_nat_ht[NAT_TAB_MASK + 1];
static u32 nat_idx_gen;
-static DEFINE_RWLOCK(nat_lock);
-static struct tcf_hashinfo nat_hash_info = {
- .htab = tcf_nat_ht,
- .hmask = NAT_TAB_MASK,
- .lock = &nat_lock,
-};
+static struct tcf_hashinfo nat_hash_info;
static const struct nla_policy nat_policy[TCA_NAT_MAX + 1] = {
[TCA_NAT_PARMS] = { .len = sizeof(struct tc_nat) },
@@ -316,12 +310,16 @@ MODULE_LICENSE("GPL");
static int __init nat_init_module(void)
{
+ int err = tcf_hashinfo_init(&nat_hash_info, NAT_TAB_MASK);
+ if (err)
+ return err;
return tcf_register_action(&act_nat_ops);
}
static void __exit nat_cleanup_module(void)
{
tcf_unregister_action(&act_nat_ops);
+ tcf_hashinfo_destroy(&nat_hash_info);
}
module_init(nat_init_module);
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index 03b67674169c..e2520e90a10d 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -24,15 +24,9 @@
#include <net/tc_act/tc_pedit.h>
#define PEDIT_TAB_MASK 15
-static struct tcf_common *tcf_pedit_ht[PEDIT_TAB_MASK + 1];
static u32 pedit_idx_gen;
-static DEFINE_RWLOCK(pedit_lock);
-static struct tcf_hashinfo pedit_hash_info = {
- .htab = tcf_pedit_ht,
- .hmask = PEDIT_TAB_MASK,
- .lock = &pedit_lock,
-};
+static struct tcf_hashinfo pedit_hash_info;
static const struct nla_policy pedit_policy[TCA_PEDIT_MAX + 1] = {
[TCA_PEDIT_PARMS] = { .len = sizeof(struct tc_pedit) },
@@ -252,11 +246,15 @@ MODULE_LICENSE("GPL");
static int __init pedit_init_module(void)
{
+ int err = tcf_hashinfo_init(&pedit_hash_info, PEDIT_TAB_MASK);
+ if (err)
+ return err;
return tcf_register_action(&act_pedit_ops);
}
static void __exit pedit_cleanup_module(void)
{
+ tcf_hashinfo_destroy(&pedit_hash_info);
tcf_unregister_action(&act_pedit_ops);
}
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 16a62c36928a..819a9a4d1987 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -41,15 +41,8 @@ struct tcf_police {
container_of(pc, struct tcf_police, common)
#define POL_TAB_MASK 15
-static struct tcf_common *tcf_police_ht[POL_TAB_MASK + 1];
static u32 police_idx_gen;
-static DEFINE_RWLOCK(police_lock);
-
-static struct tcf_hashinfo police_hash_info = {
- .htab = tcf_police_ht,
- .hmask = POL_TAB_MASK,
- .lock = &police_lock,
-};
+static struct tcf_hashinfo police_hash_info;
/* old policer structure from before tc actions */
struct tc_police_compat {
@@ -67,18 +60,19 @@ struct tc_police_compat {
static int tcf_act_police_walker(struct sk_buff *skb, struct netlink_callback *cb,
int type, struct tc_action *a)
{
+ struct hlist_head *head;
struct tcf_common *p;
int err = 0, index = -1, i = 0, s_i = 0, n_i = 0;
struct nlattr *nest;
- read_lock_bh(&police_lock);
+ spin_lock_bh(&police_hash_info.lock);
s_i = cb->args[0];
for (i = 0; i < (POL_TAB_MASK + 1); i++) {
- p = tcf_police_ht[tcf_hash(i, POL_TAB_MASK)];
+ head = &police_hash_info.htab[tcf_hash(i, POL_TAB_MASK)];
- for (; p; p = p->tcfc_next) {
+ hlist_for_each_entry_rcu(p, head, tcfc_head) {
index++;
if (index < s_i)
continue;
@@ -101,7 +95,7 @@ static int tcf_act_police_walker(struct sk_buff *skb, struct netlink_callback *c
}
}
done:
- read_unlock_bh(&police_lock);
+ spin_unlock_bh(&police_hash_info.lock);
if (n_i)
cb->args[0] += n_i;
return n_i;
@@ -113,25 +107,16 @@ nla_put_failure:
static void tcf_police_destroy(struct tcf_police *p)
{
- unsigned int h = tcf_hash(p->tcf_index, POL_TAB_MASK);
- struct tcf_common **p1p;
-
- for (p1p = &tcf_police_ht[h]; *p1p; p1p = &(*p1p)->tcfc_next) {
- if (*p1p == &p->common) {
- write_lock_bh(&police_lock);
- *p1p = p->tcf_next;
- write_unlock_bh(&police_lock);
- gen_kill_estimator(&p->tcf_bstats,
- &p->tcf_rate_est);
- /*
- * gen_estimator est_timer() might access p->tcf_lock
- * or bstats, wait a RCU grace period before freeing p
- */
- kfree_rcu(p, tcf_rcu);
- return;
- }
- }
- WARN_ON(1);
+ spin_lock_bh(&police_hash_info.lock);
+ hlist_del(&p->tcf_head);
+ spin_unlock_bh(&police_hash_info.lock);
+ gen_kill_estimator(&p->tcf_bstats,
+ &p->tcf_rate_est);
+ /*
+ * gen_estimator est_timer() might access p->tcf_lock
+ * or bstats, wait a RCU grace period before freeing p
+ */
+ kfree_rcu(p, tcf_rcu);
}
static const struct nla_policy police_policy[TCA_POLICE_MAX + 1] = {
@@ -266,10 +251,9 @@ override:
police->tcf_index = parm->index ? parm->index :
tcf_hash_new_index(&police_idx_gen, &police_hash_info);
h = tcf_hash(police->tcf_index, POL_TAB_MASK);
- write_lock_bh(&police_lock);
- police->tcf_next = tcf_police_ht[h];
- tcf_police_ht[h] = &police->common;
- write_unlock_bh(&police_lock);
+ spin_lock_bh(&police_hash_info.lock);
+ hlist_add_head(&police->tcf_head, &police_hash_info.htab[h]);
+ spin_unlock_bh(&police_hash_info.lock);
a->priv = police;
return ret;
@@ -277,10 +261,8 @@ override:
failure_unlock:
spin_unlock_bh(&police->tcf_lock);
failure:
- if (P_tab)
- qdisc_put_rtab(P_tab);
- if (R_tab)
- qdisc_put_rtab(R_tab);
+ qdisc_put_rtab(P_tab);
+ qdisc_put_rtab(R_tab);
if (ret == ACT_P_CREATED)
kfree(police);
return err;
@@ -414,12 +396,19 @@ static struct tc_action_ops act_police_ops = {
static int __init
police_init_module(void)
{
- return tcf_register_action(&act_police_ops);
+ int err = tcf_hashinfo_init(&police_hash_info, POL_TAB_MASK);
+ if (err)
+ return err;
+ err = tcf_register_action(&act_police_ops);
+ if (err)
+ tcf_hashinfo_destroy(&police_hash_info);
+ return err;
}
static void __exit
police_cleanup_module(void)
{
+ tcf_hashinfo_destroy(&police_hash_info);
tcf_unregister_action(&act_police_ops);
}
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index 31157d3e729c..81aebc162e5c 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -25,15 +25,8 @@
#include <net/tc_act/tc_defact.h>
#define SIMP_TAB_MASK 7
-static struct tcf_common *tcf_simp_ht[SIMP_TAB_MASK + 1];
static u32 simp_idx_gen;
-static DEFINE_RWLOCK(simp_lock);
-
-static struct tcf_hashinfo simp_hash_info = {
- .htab = tcf_simp_ht,
- .hmask = SIMP_TAB_MASK,
- .lock = &simp_lock,
-};
+static struct tcf_hashinfo simp_hash_info;
#define SIMP_MAX_DATA 32
static int tcf_simp(struct sk_buff *skb, const struct tc_action *a,
@@ -209,14 +202,23 @@ MODULE_LICENSE("GPL");
static int __init simp_init_module(void)
{
- int ret = tcf_register_action(&act_simp_ops);
+ int err, ret;
+ err = tcf_hashinfo_init(&simp_hash_info, SIMP_TAB_MASK);
+ if (err)
+ return err;
+
+ ret = tcf_register_action(&act_simp_ops);
if (!ret)
pr_info("Simple TC action Loaded\n");
+ else
+ tcf_hashinfo_destroy(&simp_hash_info);
+
return ret;
}
static void __exit simp_cleanup_module(void)
{
+ tcf_hashinfo_destroy(&simp_hash_info);
tcf_unregister_action(&act_simp_ops);
}
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index cf20add1c3ff..aa0a4c056f31 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -28,15 +28,8 @@
#include <net/tc_act/tc_skbedit.h>
#define SKBEDIT_TAB_MASK 15
-static struct tcf_common *tcf_skbedit_ht[SKBEDIT_TAB_MASK + 1];
static u32 skbedit_idx_gen;
-static DEFINE_RWLOCK(skbedit_lock);
-
-static struct tcf_hashinfo skbedit_hash_info = {
- .htab = tcf_skbedit_ht,
- .hmask = SKBEDIT_TAB_MASK,
- .lock = &skbedit_lock,
-};
+static struct tcf_hashinfo skbedit_hash_info;
static int tcf_skbedit(struct sk_buff *skb, const struct tc_action *a,
struct tcf_result *res)
@@ -210,11 +203,15 @@ MODULE_LICENSE("GPL");
static int __init skbedit_init_module(void)
{
+ int err = tcf_hashinfo_init(&skbedit_hash_info, SKBEDIT_TAB_MASK);
+ if (err)
+ return err;
return tcf_register_action(&act_skbedit_ops);
}
static void __exit skbedit_cleanup_module(void)
{
+ tcf_hashinfo_destroy(&skbedit_hash_info);
tcf_unregister_action(&act_skbedit_ops);
}
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 8e118af90973..12e882ef596b 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -31,8 +31,7 @@
#include <net/pkt_cls.h>
/* The list of all installed classifier types */
-
-static struct tcf_proto_ops *tcf_proto_base __read_mostly;
+static LIST_HEAD(tcf_proto_base);
/* Protects list of registered TC modules. It is pure SMP lock. */
static DEFINE_RWLOCK(cls_mod_lock);
@@ -41,36 +40,35 @@ static DEFINE_RWLOCK(cls_mod_lock);
static const struct tcf_proto_ops *tcf_proto_lookup_ops(struct nlattr *kind)
{
- const struct tcf_proto_ops *t = NULL;
+ const struct tcf_proto_ops *t, *res = NULL;
if (kind) {
read_lock(&cls_mod_lock);
- for (t = tcf_proto_base; t; t = t->next) {
+ list_for_each_entry(t, &tcf_proto_base, head) {
if (nla_strcmp(kind, t->kind) == 0) {
- if (!try_module_get(t->owner))
- t = NULL;
+ if (try_module_get(t->owner))
+ res = t;
break;
}
}
read_unlock(&cls_mod_lock);
}
- return t;
+ return res;
}
/* Register(unregister) new classifier type */
int register_tcf_proto_ops(struct tcf_proto_ops *ops)
{
- struct tcf_proto_ops *t, **tp;
+ struct tcf_proto_ops *t;
int rc = -EEXIST;
write_lock(&cls_mod_lock);
- for (tp = &tcf_proto_base; (t = *tp) != NULL; tp = &t->next)
+ list_for_each_entry(t, &tcf_proto_base, head)
if (!strcmp(ops->kind, t->kind))
goto out;
- ops->next = NULL;
- *tp = ops;
+ list_add_tail(&ops->head, &tcf_proto_base);
rc = 0;
out:
write_unlock(&cls_mod_lock);
@@ -80,19 +78,17 @@ EXPORT_SYMBOL(register_tcf_proto_ops);
int unregister_tcf_proto_ops(struct tcf_proto_ops *ops)
{
- struct tcf_proto_ops *t, **tp;
+ struct tcf_proto_ops *t;
int rc = -ENOENT;
write_lock(&cls_mod_lock);
- for (tp = &tcf_proto_base; (t = *tp) != NULL; tp = &t->next)
- if (t == ops)
+ list_for_each_entry(t, &tcf_proto_base, head) {
+ if (t == ops) {
+ list_del(&t->head);
+ rc = 0;
break;
-
- if (!t)
- goto out;
- *tp = t->next;
- rc = 0;
-out:
+ }
+ }
write_unlock(&cls_mod_lock);
return rc;
}
@@ -500,46 +496,41 @@ out:
void tcf_exts_destroy(struct tcf_proto *tp, struct tcf_exts *exts)
{
#ifdef CONFIG_NET_CLS_ACT
- if (exts->action) {
- tcf_action_destroy(exts->action, TCA_ACT_UNBIND);
- exts->action = NULL;
- }
+ tcf_action_destroy(&exts->actions, TCA_ACT_UNBIND);
+ INIT_LIST_HEAD(&exts->actions);
#endif
}
EXPORT_SYMBOL(tcf_exts_destroy);
int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
- struct nlattr *rate_tlv, struct tcf_exts *exts,
- const struct tcf_ext_map *map)
+ struct nlattr *rate_tlv, struct tcf_exts *exts)
{
- memset(exts, 0, sizeof(*exts));
-
#ifdef CONFIG_NET_CLS_ACT
{
struct tc_action *act;
- if (map->police && tb[map->police]) {
- act = tcf_action_init_1(net, tb[map->police], rate_tlv,
+ INIT_LIST_HEAD(&exts->actions);
+ if (exts->police && tb[exts->police]) {
+ act = tcf_action_init_1(net, tb[exts->police], rate_tlv,
"police", TCA_ACT_NOREPLACE,
TCA_ACT_BIND);
if (IS_ERR(act))
return PTR_ERR(act);
- act->type = TCA_OLD_COMPAT;
- exts->action = act;
- } else if (map->action && tb[map->action]) {
- act = tcf_action_init(net, tb[map->action], rate_tlv,
+ act->type = exts->type = TCA_OLD_COMPAT;
+ list_add(&act->list, &exts->actions);
+ } else if (exts->action && tb[exts->action]) {
+ int err;
+ err = tcf_action_init(net, tb[exts->action], rate_tlv,
NULL, TCA_ACT_NOREPLACE,
- TCA_ACT_BIND);
- if (IS_ERR(act))
- return PTR_ERR(act);
-
- exts->action = act;
+ TCA_ACT_BIND, &exts->actions);
+ if (err)
+ return err;
}
}
#else
- if ((map->action && tb[map->action]) ||
- (map->police && tb[map->police]))
+ if ((exts->action && tb[exts->action]) ||
+ (exts->police && tb[exts->police]))
return -EOPNOTSUPP;
#endif
@@ -551,43 +542,44 @@ void tcf_exts_change(struct tcf_proto *tp, struct tcf_exts *dst,
struct tcf_exts *src)
{
#ifdef CONFIG_NET_CLS_ACT
- if (src->action) {
- struct tc_action *act;
+ if (!list_empty(&src->actions)) {
+ LIST_HEAD(tmp);
tcf_tree_lock(tp);
- act = dst->action;
- dst->action = src->action;
+ list_splice_init(&dst->actions, &tmp);
+ list_splice(&src->actions, &dst->actions);
tcf_tree_unlock(tp);
- if (act)
- tcf_action_destroy(act, TCA_ACT_UNBIND);
+ tcf_action_destroy(&tmp, TCA_ACT_UNBIND);
}
#endif
}
EXPORT_SYMBOL(tcf_exts_change);
-int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts,
- const struct tcf_ext_map *map)
+#define tcf_exts_first_act(ext) \
+ list_first_entry(&(exts)->actions, struct tc_action, list)
+
+int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts)
{
#ifdef CONFIG_NET_CLS_ACT
- if (map->action && exts->action) {
+ if (exts->action && !list_empty(&exts->actions)) {
/*
* again for backward compatible mode - we want
* to work with both old and new modes of entering
* tc data even if iproute2 was newer - jhs
*/
struct nlattr *nest;
-
- if (exts->action->type != TCA_OLD_COMPAT) {
- nest = nla_nest_start(skb, map->action);
+ if (exts->type != TCA_OLD_COMPAT) {
+ nest = nla_nest_start(skb, exts->action);
if (nest == NULL)
goto nla_put_failure;
- if (tcf_action_dump(skb, exts->action, 0, 0) < 0)
+ if (tcf_action_dump(skb, &exts->actions, 0, 0) < 0)
goto nla_put_failure;
nla_nest_end(skb, nest);
- } else if (map->police) {
- nest = nla_nest_start(skb, map->police);
+ } else if (exts->police) {
+ struct tc_action *act = tcf_exts_first_act(exts);
+ nest = nla_nest_start(skb, exts->police);
if (nest == NULL)
goto nla_put_failure;
- if (tcf_action_dump_old(skb, exts->action, 0, 0) < 0)
+ if (tcf_action_dump_old(skb, act, 0, 0) < 0)
goto nla_put_failure;
nla_nest_end(skb, nest);
}
@@ -600,17 +592,14 @@ nla_put_failure: __attribute__ ((unused))
EXPORT_SYMBOL(tcf_exts_dump);
-int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts,
- const struct tcf_ext_map *map)
+int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts)
{
#ifdef CONFIG_NET_CLS_ACT
- if (exts->action)
- if (tcf_action_copy_stats(skb, exts->action, 1) < 0)
- goto nla_put_failure;
+ struct tc_action *a = tcf_exts_first_act(exts);
+ if (tcf_action_copy_stats(skb, a, 1) < 0)
+ return -1;
#endif
return 0;
-nla_put_failure: __attribute__ ((unused))
- return -1;
}
EXPORT_SYMBOL(tcf_exts_dump_stats);
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index 636d9131d870..b6552035d1f4 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -34,11 +34,6 @@ struct basic_filter {
struct list_head link;
};
-static const struct tcf_ext_map basic_ext_map = {
- .action = TCA_BASIC_ACT,
- .police = TCA_BASIC_POLICE
-};
-
static int basic_classify(struct sk_buff *skb, const struct tcf_proto *tp,
struct tcf_result *res)
{
@@ -141,7 +136,8 @@ static int basic_set_parms(struct net *net, struct tcf_proto *tp,
struct tcf_exts e;
struct tcf_ematch_tree t;
- err = tcf_exts_validate(net, tp, tb, est, &e, &basic_ext_map);
+ tcf_exts_init(&e, TCA_BASIC_ACT, TCA_BASIC_POLICE);
+ err = tcf_exts_validate(net, tp, tb, est, &e);
if (err < 0)
return err;
@@ -191,6 +187,7 @@ static int basic_change(struct net *net, struct sk_buff *in_skb,
if (f == NULL)
goto errout;
+ tcf_exts_init(&f->exts, TCA_BASIC_ACT, TCA_BASIC_POLICE);
err = -EINVAL;
if (handle)
f->handle = handle;
@@ -263,13 +260,13 @@ static int basic_dump(struct tcf_proto *tp, unsigned long fh,
nla_put_u32(skb, TCA_BASIC_CLASSID, f->res.classid))
goto nla_put_failure;
- if (tcf_exts_dump(skb, &f->exts, &basic_ext_map) < 0 ||
+ if (tcf_exts_dump(skb, &f->exts) < 0 ||
tcf_em_tree_dump(skb, &f->ematches, TCA_BASIC_EMATCHES) < 0)
goto nla_put_failure;
nla_nest_end(skb, nest);
- if (tcf_exts_dump_stats(skb, &f->exts, &basic_ext_map) < 0)
+ if (tcf_exts_dump_stats(skb, &f->exts) < 0)
goto nla_put_failure;
return skb->len;
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index d7c72be121f3..00a5a585e5f1 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -46,11 +46,6 @@ static const struct nla_policy bpf_policy[TCA_BPF_MAX + 1] = {
.len = sizeof(struct sock_filter) * BPF_MAXINSNS },
};
-static const struct tcf_ext_map bpf_ext_map = {
- .action = TCA_BPF_ACT,
- .police = TCA_BPF_POLICE,
-};
-
static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
struct tcf_result *res)
{
@@ -174,7 +169,8 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp,
if (!tb[TCA_BPF_OPS_LEN] || !tb[TCA_BPF_OPS] || !tb[TCA_BPF_CLASSID])
return -EINVAL;
- ret = tcf_exts_validate(net, tp, tb, est, &exts, &bpf_ext_map);
+ tcf_exts_init(&exts, TCA_BPF_ACT, TCA_BPF_POLICE);
+ ret = tcf_exts_validate(net, tp, tb, est, &exts);
if (ret < 0)
return ret;
@@ -271,6 +267,7 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
if (prog == NULL)
return -ENOBUFS;
+ tcf_exts_init(&prog->exts, TCA_BPF_ACT, TCA_BPF_POLICE);
if (handle == 0)
prog->handle = cls_bpf_grab_new_handle(tp, head);
else
@@ -325,12 +322,12 @@ static int cls_bpf_dump(struct tcf_proto *tp, unsigned long fh,
memcpy(nla_data(nla), prog->bpf_ops, nla_len(nla));
- if (tcf_exts_dump(skb, &prog->exts, &bpf_ext_map) < 0)
+ if (tcf_exts_dump(skb, &prog->exts) < 0)
goto nla_put_failure;
nla_nest_end(skb, nest);
- if (tcf_exts_dump_stats(skb, &prog->exts, &bpf_ext_map) < 0)
+ if (tcf_exts_dump_stats(skb, &prog->exts) < 0)
goto nla_put_failure;
return skb->len;
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index 838fa40abad1..8349fcdc50f3 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -76,11 +76,6 @@ static int cls_cgroup_init(struct tcf_proto *tp)
return 0;
}
-static const struct tcf_ext_map cgroup_ext_map = {
- .action = TCA_CGROUP_ACT,
- .police = TCA_CGROUP_POLICE,
-};
-
static const struct nla_policy cgroup_policy[TCA_CGROUP_MAX + 1] = {
[TCA_CGROUP_EMATCHES] = { .type = NLA_NESTED },
};
@@ -107,6 +102,7 @@ static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb,
if (head == NULL)
return -ENOBUFS;
+ tcf_exts_init(&head->exts, TCA_CGROUP_ACT, TCA_CGROUP_POLICE);
head->handle = handle;
tcf_tree_lock(tp);
@@ -122,8 +118,8 @@ static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb,
if (err < 0)
return err;
- err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e,
- &cgroup_ext_map);
+ tcf_exts_init(&e, TCA_CGROUP_ACT, TCA_CGROUP_POLICE);
+ err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e);
if (err < 0)
return err;
@@ -181,13 +177,13 @@ static int cls_cgroup_dump(struct tcf_proto *tp, unsigned long fh,
if (nest == NULL)
goto nla_put_failure;
- if (tcf_exts_dump(skb, &head->exts, &cgroup_ext_map) < 0 ||
+ if (tcf_exts_dump(skb, &head->exts) < 0 ||
tcf_em_tree_dump(skb, &head->ematches, TCA_CGROUP_EMATCHES) < 0)
goto nla_put_failure;
nla_nest_end(skb, nest);
- if (tcf_exts_dump_stats(skb, &head->exts, &cgroup_ext_map) < 0)
+ if (tcf_exts_dump_stats(skb, &head->exts) < 0)
goto nla_put_failure;
return skb->len;
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index 7881e2fccbc2..dfd18a5c3e81 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -56,11 +56,6 @@ struct flow_filter {
u32 hashrnd;
};
-static const struct tcf_ext_map flow_ext_map = {
- .action = TCA_FLOW_ACT,
- .police = TCA_FLOW_POLICE,
-};
-
static inline u32 addr_fold(void *addr)
{
unsigned long a = (unsigned long)addr;
@@ -220,7 +215,7 @@ static u32 flow_get_vlan_tag(const struct sk_buff *skb)
static u32 flow_get_rxhash(struct sk_buff *skb)
{
- return skb_get_rxhash(skb);
+ return skb_get_hash(skb);
}
static u32 flow_key_get(struct sk_buff *skb, int key, struct flow_keys *flow)
@@ -397,7 +392,8 @@ static int flow_change(struct net *net, struct sk_buff *in_skb,
return -EOPNOTSUPP;
}
- err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, &flow_ext_map);
+ tcf_exts_init(&e, TCA_FLOW_ACT, TCA_FLOW_POLICE);
+ err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e);
if (err < 0)
return err;
@@ -455,6 +451,7 @@ static int flow_change(struct net *net, struct sk_buff *in_skb,
f->handle = handle;
f->mask = ~0U;
+ tcf_exts_init(&f->exts, TCA_FLOW_ACT, TCA_FLOW_POLICE);
get_random_bytes(&f->hashrnd, 4);
f->perturb_timer.function = flow_perturbation;
@@ -608,7 +605,7 @@ static int flow_dump(struct tcf_proto *tp, unsigned long fh,
nla_put_u32(skb, TCA_FLOW_PERTURB, f->perturb_period / HZ))
goto nla_put_failure;
- if (tcf_exts_dump(skb, &f->exts, &flow_ext_map) < 0)
+ if (tcf_exts_dump(skb, &f->exts) < 0)
goto nla_put_failure;
#ifdef CONFIG_NET_EMATCH
if (f->ematches.hdr.nmatches &&
@@ -617,7 +614,7 @@ static int flow_dump(struct tcf_proto *tp, unsigned long fh,
#endif
nla_nest_end(skb, nest);
- if (tcf_exts_dump_stats(skb, &f->exts, &flow_ext_map) < 0)
+ if (tcf_exts_dump_stats(skb, &f->exts) < 0)
goto nla_put_failure;
return skb->len;
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index 9b97172db84a..3f9cece13807 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -46,11 +46,6 @@ struct fw_filter {
struct tcf_exts exts;
};
-static const struct tcf_ext_map fw_ext_map = {
- .action = TCA_FW_ACT,
- .police = TCA_FW_POLICE
-};
-
static inline int fw_hash(u32 handle)
{
if (HTSIZE == 4096)
@@ -200,7 +195,8 @@ fw_change_attrs(struct net *net, struct tcf_proto *tp, struct fw_filter *f,
u32 mask;
int err;
- err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, &fw_ext_map);
+ tcf_exts_init(&e, TCA_FW_ACT, TCA_FW_POLICE);
+ err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e);
if (err < 0)
return err;
@@ -280,6 +276,7 @@ static int fw_change(struct net *net, struct sk_buff *in_skb,
if (f == NULL)
return -ENOBUFS;
+ tcf_exts_init(&f->exts, TCA_FW_ACT, TCA_FW_POLICE);
f->id = handle;
err = fw_change_attrs(net, tp, f, tb, tca, base);
@@ -359,12 +356,12 @@ static int fw_dump(struct tcf_proto *tp, unsigned long fh,
nla_put_u32(skb, TCA_FW_MASK, head->mask))
goto nla_put_failure;
- if (tcf_exts_dump(skb, &f->exts, &fw_ext_map) < 0)
+ if (tcf_exts_dump(skb, &f->exts) < 0)
goto nla_put_failure;
nla_nest_end(skb, nest);
- if (tcf_exts_dump_stats(skb, &f->exts, &fw_ext_map) < 0)
+ if (tcf_exts_dump_stats(skb, &f->exts) < 0)
goto nla_put_failure;
return skb->len;
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index 37da567d833e..2473953a5948 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -59,11 +59,6 @@ struct route4_filter {
#define ROUTE4_FAILURE ((struct route4_filter *)(-1L))
-static const struct tcf_ext_map route_ext_map = {
- .police = TCA_ROUTE4_POLICE,
- .action = TCA_ROUTE4_ACT
-};
-
static inline int route4_fastmap_hash(u32 id, int iif)
{
return id & 0xF;
@@ -347,7 +342,8 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp,
struct route4_bucket *b;
struct tcf_exts e;
- err = tcf_exts_validate(net, tp, tb, est, &e, &route_ext_map);
+ tcf_exts_init(&e, TCA_ROUTE4_ACT, TCA_ROUTE4_POLICE);
+ err = tcf_exts_validate(net, tp, tb, est, &e);
if (err < 0)
return err;
@@ -481,6 +477,7 @@ static int route4_change(struct net *net, struct sk_buff *in_skb,
if (f == NULL)
goto errout;
+ tcf_exts_init(&f->exts, TCA_ROUTE4_ACT, TCA_ROUTE4_POLICE);
err = route4_set_parms(net, tp, base, f, handle, head, tb,
tca[TCA_RATE], 1);
if (err < 0)
@@ -589,12 +586,12 @@ static int route4_dump(struct tcf_proto *tp, unsigned long fh,
nla_put_u32(skb, TCA_ROUTE4_CLASSID, f->res.classid))
goto nla_put_failure;
- if (tcf_exts_dump(skb, &f->exts, &route_ext_map) < 0)
+ if (tcf_exts_dump(skb, &f->exts) < 0)
goto nla_put_failure;
nla_nest_end(skb, nest);
- if (tcf_exts_dump_stats(skb, &f->exts, &route_ext_map) < 0)
+ if (tcf_exts_dump_stats(skb, &f->exts) < 0)
goto nla_put_failure;
return skb->len;
diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h
index 252d8b05872e..4f25c2ac825b 100644
--- a/net/sched/cls_rsvp.h
+++ b/net/sched/cls_rsvp.h
@@ -116,11 +116,6 @@ static inline unsigned int hash_src(__be32 *src)
return h & 0xF;
}
-static struct tcf_ext_map rsvp_ext_map = {
- .police = TCA_RSVP_POLICE,
- .action = TCA_RSVP_ACT
-};
-
#define RSVP_APPLY_RESULT() \
{ \
int r = tcf_exts_exec(skb, &f->exts, res); \
@@ -440,7 +435,8 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb,
if (err < 0)
return err;
- err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, &rsvp_ext_map);
+ tcf_exts_init(&e, TCA_RSVP_ACT, TCA_RSVP_POLICE);
+ err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e);
if (err < 0)
return err;
@@ -471,6 +467,7 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb,
if (f == NULL)
goto errout2;
+ tcf_exts_init(&f->exts, TCA_RSVP_ACT, TCA_RSVP_POLICE);
h2 = 16;
if (tb[TCA_RSVP_SRC]) {
memcpy(f->src, nla_data(tb[TCA_RSVP_SRC]), sizeof(f->src));
@@ -633,12 +630,12 @@ static int rsvp_dump(struct tcf_proto *tp, unsigned long fh,
nla_put(skb, TCA_RSVP_SRC, sizeof(f->src), f->src))
goto nla_put_failure;
- if (tcf_exts_dump(skb, &f->exts, &rsvp_ext_map) < 0)
+ if (tcf_exts_dump(skb, &f->exts) < 0)
goto nla_put_failure;
nla_nest_end(skb, nest);
- if (tcf_exts_dump_stats(skb, &f->exts, &rsvp_ext_map) < 0)
+ if (tcf_exts_dump_stats(skb, &f->exts) < 0)
goto nla_put_failure;
return skb->len;
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index b86535a40169..ffad18791c93 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -50,11 +50,6 @@ struct tcindex_data {
int fall_through; /* 0: only classify if explicit match */
};
-static const struct tcf_ext_map tcindex_ext_map = {
- .police = TCA_TCINDEX_POLICE,
- .action = TCA_TCINDEX_ACT
-};
-
static inline int
tcindex_filter_is_set(struct tcindex_filter_result *r)
{
@@ -209,17 +204,21 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
struct tcindex_filter *f = NULL; /* make gcc behave */
struct tcf_exts e;
- err = tcf_exts_validate(net, tp, tb, est, &e, &tcindex_ext_map);
+ tcf_exts_init(&e, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
+ err = tcf_exts_validate(net, tp, tb, est, &e);
if (err < 0)
return err;
memcpy(&cp, p, sizeof(cp));
memset(&new_filter_result, 0, sizeof(new_filter_result));
+ tcf_exts_init(&new_filter_result.exts, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
if (old_r)
memcpy(&cr, r, sizeof(cr));
- else
+ else {
memset(&cr, 0, sizeof(cr));
+ tcf_exts_init(&cr.exts, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
+ }
if (tb[TCA_TCINDEX_HASH])
cp.hash = nla_get_u32(tb[TCA_TCINDEX_HASH]);
@@ -468,11 +467,11 @@ static int tcindex_dump(struct tcf_proto *tp, unsigned long fh,
nla_put_u32(skb, TCA_TCINDEX_CLASSID, r->res.classid))
goto nla_put_failure;
- if (tcf_exts_dump(skb, &r->exts, &tcindex_ext_map) < 0)
+ if (tcf_exts_dump(skb, &r->exts) < 0)
goto nla_put_failure;
nla_nest_end(skb, nest);
- if (tcf_exts_dump_stats(skb, &r->exts, &tcindex_ext_map) < 0)
+ if (tcf_exts_dump_stats(skb, &r->exts) < 0)
goto nla_put_failure;
}
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 59e546c2ac98..20f2fb79c747 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -79,11 +79,6 @@ struct tc_u_common {
u32 hgenerator;
};
-static const struct tcf_ext_map u32_ext_map = {
- .action = TCA_U32_ACT,
- .police = TCA_U32_POLICE
-};
-
static inline unsigned int u32_hash_fold(__be32 key,
const struct tc_u32_sel *sel,
u8 fshift)
@@ -496,7 +491,8 @@ static int u32_set_parms(struct net *net, struct tcf_proto *tp,
int err;
struct tcf_exts e;
- err = tcf_exts_validate(net, tp, tb, est, &e, &u32_ext_map);
+ tcf_exts_init(&e, TCA_U32_ACT, TCA_U32_POLICE);
+ err = tcf_exts_validate(net, tp, tb, est, &e);
if (err < 0)
return err;
@@ -646,6 +642,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
n->ht_up = ht;
n->handle = handle;
n->fshift = s->hmask ? ffs(ntohl(s->hmask)) - 1 : 0;
+ tcf_exts_init(&n->exts, TCA_U32_ACT, TCA_U32_POLICE);
#ifdef CONFIG_CLS_U32_MARK
if (tb[TCA_U32_MARK]) {
@@ -759,7 +756,7 @@ static int u32_dump(struct tcf_proto *tp, unsigned long fh,
goto nla_put_failure;
#endif
- if (tcf_exts_dump(skb, &n->exts, &u32_ext_map) < 0)
+ if (tcf_exts_dump(skb, &n->exts) < 0)
goto nla_put_failure;
#ifdef CONFIG_NET_CLS_IND
@@ -778,7 +775,7 @@ static int u32_dump(struct tcf_proto *tp, unsigned long fh,
nla_nest_end(skb, nest);
if (TC_U32_KEY(n->handle))
- if (tcf_exts_dump_stats(skb, &n->exts, &u32_ext_map) < 0)
+ if (tcf_exts_dump_stats(skb, &n->exts) < 0)
goto nla_put_failure;
return skb->len;
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c
index 382519a5d7f9..9b8c0b0e60d7 100644
--- a/net/sched/em_meta.c
+++ b/net/sched/em_meta.c
@@ -222,7 +222,7 @@ META_COLLECTOR(int_maclen)
META_COLLECTOR(int_rxhash)
{
- dst->value = skb_get_rxhash(skb);
+ dst->value = skb_get_hash(skb);
}
/**************************************************************************
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 547b4a88ae2a..1313145e3b86 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -135,7 +135,7 @@ static DEFINE_RWLOCK(qdisc_mod_lock);
static struct Qdisc_ops *qdisc_base;
-/* Register/uregister queueing discipline */
+/* Register/unregister queueing discipline */
int register_qdisc(struct Qdisc_ops *qops)
{
@@ -273,8 +273,11 @@ static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
void qdisc_list_add(struct Qdisc *q)
{
+ struct Qdisc *root = qdisc_dev(q)->qdisc;
+
+ WARN_ON_ONCE(root == &noop_qdisc);
if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS))
- list_add_tail(&q->list, &qdisc_dev(q)->qdisc->list);
+ list_add_tail(&q->list, &root->list);
}
EXPORT_SYMBOL(qdisc_list_add);
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index d5a8a4b2454f..2f80d01d42a6 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -1060,8 +1060,8 @@ static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
}
if (cl->quantum <= 0 ||
cl->quantum > 32*qdisc_dev(cl->qdisc)->mtu) {
- pr_warning("CBQ: class %08x has bad quantum==%ld, repaired.\n",
- cl->common.classid, cl->quantum);
+ pr_warn("CBQ: class %08x has bad quantum==%ld, repaired.\n",
+ cl->common.classid, cl->quantum);
cl->quantum = qdisc_dev(cl->qdisc)->mtu/2 + 1;
}
}
@@ -1783,8 +1783,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
qdisc_root_sleeping_lock(sch),
tca[TCA_RATE]);
if (err) {
- if (rtab)
- qdisc_put_rtab(rtab);
+ qdisc_put_rtab(rtab);
return err;
}
}
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index 0952fd2684e4..49d6ef338b55 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -57,8 +57,8 @@ static int dsmark_graft(struct Qdisc *sch, unsigned long arg,
{
struct dsmark_qdisc_data *p = qdisc_priv(sch);
- pr_debug("dsmark_graft(sch %p,[qdisc %p],new %p,old %p)\n",
- sch, p, new, old);
+ pr_debug("%s(sch %p,[qdisc %p],new %p,old %p)\n",
+ __func__, sch, p, new, old);
if (new == NULL) {
new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
@@ -85,8 +85,8 @@ static struct Qdisc *dsmark_leaf(struct Qdisc *sch, unsigned long arg)
static unsigned long dsmark_get(struct Qdisc *sch, u32 classid)
{
- pr_debug("dsmark_get(sch %p,[qdisc %p],classid %x)\n",
- sch, qdisc_priv(sch), classid);
+ pr_debug("%s(sch %p,[qdisc %p],classid %x)\n",
+ __func__, sch, qdisc_priv(sch), classid);
return TC_H_MIN(classid) + 1;
}
@@ -118,8 +118,8 @@ static int dsmark_change(struct Qdisc *sch, u32 classid, u32 parent,
int err = -EINVAL;
u8 mask = 0;
- pr_debug("dsmark_change(sch %p,[qdisc %p],classid %x,parent %x),"
- "arg 0x%lx\n", sch, p, classid, parent, *arg);
+ pr_debug("%s(sch %p,[qdisc %p],classid %x,parent %x), arg 0x%lx\n",
+ __func__, sch, p, classid, parent, *arg);
if (!dsmark_valid_index(p, *arg)) {
err = -ENOENT;
@@ -166,7 +166,8 @@ static void dsmark_walk(struct Qdisc *sch, struct qdisc_walker *walker)
struct dsmark_qdisc_data *p = qdisc_priv(sch);
int i;
- pr_debug("dsmark_walk(sch %p,[qdisc %p],walker %p)\n", sch, p, walker);
+ pr_debug("%s(sch %p,[qdisc %p],walker %p)\n",
+ __func__, sch, p, walker);
if (walker->stop)
return;
@@ -199,7 +200,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch)
struct dsmark_qdisc_data *p = qdisc_priv(sch);
int err;
- pr_debug("dsmark_enqueue(skb %p,sch %p,[qdisc %p])\n", skb, sch, p);
+ pr_debug("%s(skb %p,sch %p,[qdisc %p])\n", __func__, skb, sch, p);
if (p->set_tc_index) {
switch (skb->protocol) {
@@ -275,7 +276,7 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch)
struct sk_buff *skb;
u32 index;
- pr_debug("dsmark_dequeue(sch %p,[qdisc %p])\n", sch, p);
+ pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p);
skb = p->q->ops->dequeue(p->q);
if (skb == NULL)
@@ -303,8 +304,8 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch)
* and don't need yet another qdisc as a bypass.
*/
if (p->mask[index] != 0xff || p->value[index])
- pr_warning("dsmark_dequeue: unsupported protocol %d\n",
- ntohs(skb->protocol));
+ pr_warn("%s: unsupported protocol %d\n",
+ __func__, ntohs(skb->protocol));
break;
}
@@ -315,7 +316,7 @@ static struct sk_buff *dsmark_peek(struct Qdisc *sch)
{
struct dsmark_qdisc_data *p = qdisc_priv(sch);
- pr_debug("dsmark_peek(sch %p,[qdisc %p])\n", sch, p);
+ pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p);
return p->q->ops->peek(p->q);
}
@@ -325,7 +326,7 @@ static unsigned int dsmark_drop(struct Qdisc *sch)
struct dsmark_qdisc_data *p = qdisc_priv(sch);
unsigned int len;
- pr_debug("dsmark_reset(sch %p,[qdisc %p])\n", sch, p);
+ pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p);
if (p->q->ops->drop == NULL)
return 0;
@@ -346,7 +347,7 @@ static int dsmark_init(struct Qdisc *sch, struct nlattr *opt)
u16 indices;
u8 *mask;
- pr_debug("dsmark_init(sch %p,[qdisc %p],opt %p)\n", sch, p, opt);
+ pr_debug("%s(sch %p,[qdisc %p],opt %p)\n", __func__, sch, p, opt);
if (!opt)
goto errout;
@@ -384,7 +385,7 @@ static int dsmark_init(struct Qdisc *sch, struct nlattr *opt)
if (p->q == NULL)
p->q = &noop_qdisc;
- pr_debug("dsmark_init: qdisc %p\n", p->q);
+ pr_debug("%s: qdisc %p\n", __func__, p->q);
err = 0;
errout:
@@ -395,7 +396,7 @@ static void dsmark_reset(struct Qdisc *sch)
{
struct dsmark_qdisc_data *p = qdisc_priv(sch);
- pr_debug("dsmark_reset(sch %p,[qdisc %p])\n", sch, p);
+ pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p);
qdisc_reset(p->q);
sch->q.qlen = 0;
}
@@ -404,7 +405,7 @@ static void dsmark_destroy(struct Qdisc *sch)
{
struct dsmark_qdisc_data *p = qdisc_priv(sch);
- pr_debug("dsmark_destroy(sch %p,[qdisc %p])\n", sch, p);
+ pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p);
tcf_destroy_chain(&p->filter_list);
qdisc_destroy(p->q);
@@ -417,7 +418,7 @@ static int dsmark_dump_class(struct Qdisc *sch, unsigned long cl,
struct dsmark_qdisc_data *p = qdisc_priv(sch);
struct nlattr *opts = NULL;
- pr_debug("dsmark_dump_class(sch %p,[qdisc %p],class %ld\n", sch, p, cl);
+ pr_debug("%s(sch %p,[qdisc %p],class %ld\n", __func__, sch, p, cl);
if (!dsmark_valid_index(p, cl))
return -EINVAL;
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index 95d843961907..08ef7a42c0e4 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -47,6 +47,7 @@
#include <linux/rbtree.h>
#include <linux/hash.h>
#include <linux/prefetch.h>
+#include <linux/vmalloc.h>
#include <net/netlink.h>
#include <net/pkt_sched.h>
#include <net/sock.h>
@@ -225,7 +226,7 @@ static struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q)
/* By forcing low order bit to 1, we make sure to not
* collide with a local flow (socket pointers are word aligned)
*/
- sk = (struct sock *)(skb_get_rxhash(skb) | 1L);
+ sk = (struct sock *)(skb_get_hash(skb) | 1L);
}
root = &q->fq_root[hash_32((u32)(long)sk, q->fq_trees_log)];
@@ -578,15 +579,36 @@ static void fq_rehash(struct fq_sched_data *q,
q->stat_gc_flows += fcnt;
}
-static int fq_resize(struct fq_sched_data *q, u32 log)
+static void *fq_alloc_node(size_t sz, int node)
{
+ void *ptr;
+
+ ptr = kmalloc_node(sz, GFP_KERNEL | __GFP_REPEAT | __GFP_NOWARN, node);
+ if (!ptr)
+ ptr = vmalloc_node(sz, node);
+ return ptr;
+}
+
+static void fq_free(void *addr)
+{
+ if (addr && is_vmalloc_addr(addr))
+ vfree(addr);
+ else
+ kfree(addr);
+}
+
+static int fq_resize(struct Qdisc *sch, u32 log)
+{
+ struct fq_sched_data *q = qdisc_priv(sch);
struct rb_root *array;
u32 idx;
if (q->fq_root && log == q->fq_trees_log)
return 0;
- array = kmalloc(sizeof(struct rb_root) << log, GFP_KERNEL);
+ /* If XPS was setup, we can allocate memory on right NUMA node */
+ array = fq_alloc_node(sizeof(struct rb_root) << log,
+ netdev_queue_numa_node_read(sch->dev_queue));
if (!array)
return -ENOMEM;
@@ -595,7 +617,7 @@ static int fq_resize(struct fq_sched_data *q, u32 log)
if (q->fq_root) {
fq_rehash(q, q->fq_root, q->fq_trees_log, array, log);
- kfree(q->fq_root);
+ fq_free(q->fq_root);
}
q->fq_root = array;
q->fq_trees_log = log;
@@ -676,7 +698,7 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt)
}
if (!err)
- err = fq_resize(q, fq_log);
+ err = fq_resize(sch, fq_log);
while (sch->q.qlen > sch->limit) {
struct sk_buff *skb = fq_dequeue(sch);
@@ -697,7 +719,7 @@ static void fq_destroy(struct Qdisc *sch)
struct fq_sched_data *q = qdisc_priv(sch);
fq_reset(sch);
- kfree(q->fq_root);
+ fq_free(q->fq_root);
qdisc_watchdog_cancel(&q->watchdog);
}
@@ -723,7 +745,7 @@ static int fq_init(struct Qdisc *sch, struct nlattr *opt)
if (opt)
err = fq_change(sch, opt);
else
- err = fq_resize(q, q->fq_trees_log);
+ err = fq_resize(sch, q->fq_trees_log);
return err;
}
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 6a91d7d48ade..32bb942d2faa 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -718,8 +718,8 @@ static void attach_default_qdiscs(struct net_device *dev)
} else {
qdisc = qdisc_create_dflt(txq, &mq_qdisc_ops, TC_H_ROOT);
if (qdisc) {
- qdisc->ops->attach(qdisc);
dev->qdisc = qdisc;
+ qdisc->ops->attach(qdisc);
}
}
}
diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c
index d42234c0f13b..12cbc09157fc 100644
--- a/net/sched/sch_gred.c
+++ b/net/sched/sch_gred.c
@@ -370,8 +370,8 @@ static inline int gred_change_table_def(struct Qdisc *sch, struct nlattr *dps)
for (i = table->DPs; i < MAX_DPs; i++) {
if (table->tab[i]) {
- pr_warning("GRED: Warning: Destroying "
- "shadowed VQ 0x%x\n", i);
+ pr_warn("GRED: Warning: Destroying shadowed VQ 0x%x\n",
+ i);
gred_destroy_vq(table->tab[i]);
table->tab[i] = NULL;
}
diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c
new file mode 100644
index 000000000000..cf7f614e841b
--- /dev/null
+++ b/net/sched/sch_hhf.c
@@ -0,0 +1,745 @@
+/* net/sched/sch_hhf.c Heavy-Hitter Filter (HHF)
+ *
+ * Copyright (C) 2013 Terry Lam <vtlam@google.com>
+ * Copyright (C) 2013 Nandita Dukkipati <nanditad@google.com>
+ */
+
+#include <linux/jhash.h>
+#include <linux/jiffies.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/vmalloc.h>
+#include <net/flow_keys.h>
+#include <net/pkt_sched.h>
+#include <net/sock.h>
+
+/* Heavy-Hitter Filter (HHF)
+ *
+ * Principles :
+ * Flows are classified into two buckets: non-heavy-hitter and heavy-hitter
+ * buckets. Initially, a new flow starts as non-heavy-hitter. Once classified
+ * as heavy-hitter, it is immediately switched to the heavy-hitter bucket.
+ * The buckets are dequeued by a Weighted Deficit Round Robin (WDRR) scheduler,
+ * in which the heavy-hitter bucket is served with less weight.
+ * In other words, non-heavy-hitters (e.g., short bursts of critical traffic)
+ * are isolated from heavy-hitters (e.g., persistent bulk traffic) and also have
+ * higher share of bandwidth.
+ *
+ * To capture heavy-hitters, we use the "multi-stage filter" algorithm in the
+ * following paper:
+ * [EV02] C. Estan and G. Varghese, "New Directions in Traffic Measurement and
+ * Accounting", in ACM SIGCOMM, 2002.
+ *
+ * Conceptually, a multi-stage filter comprises k independent hash functions
+ * and k counter arrays. Packets are indexed into k counter arrays by k hash
+ * functions, respectively. The counters are then increased by the packet sizes.
+ * Therefore,
+ * - For a heavy-hitter flow: *all* of its k array counters must be large.
+ * - For a non-heavy-hitter flow: some of its k array counters can be large
+ * due to hash collision with other small flows; however, with high
+ * probability, not *all* k counters are large.
+ *
+ * By the design of the multi-stage filter algorithm, the false negative rate
+ * (heavy-hitters getting away uncaptured) is zero. However, the algorithm is
+ * susceptible to false positives (non-heavy-hitters mistakenly classified as
+ * heavy-hitters).
+ * Therefore, we also implement the following optimizations to reduce false
+ * positives by avoiding unnecessary increment of the counter values:
+ * - Optimization O1: once a heavy-hitter is identified, its bytes are not
+ * accounted in the array counters. This technique is called "shielding"
+ * in Section 3.3.1 of [EV02].
+ * - Optimization O2: conservative update of counters
+ * (Section 3.3.2 of [EV02]),
+ * New counter value = max {old counter value,
+ * smallest counter value + packet bytes}
+ *
+ * Finally, we refresh the counters periodically since otherwise the counter
+ * values will keep accumulating.
+ *
+ * Once a flow is classified as heavy-hitter, we also save its per-flow state
+ * in an exact-matching flow table so that its subsequent packets can be
+ * dispatched to the heavy-hitter bucket accordingly.
+ *
+ *
+ * At a high level, this qdisc works as follows:
+ * Given a packet p:
+ * - If the flow-id of p (e.g., TCP 5-tuple) is already in the exact-matching
+ * heavy-hitter flow table, denoted table T, then send p to the heavy-hitter
+ * bucket.
+ * - Otherwise, forward p to the multi-stage filter, denoted filter F
+ * + If F decides that p belongs to a non-heavy-hitter flow, then send p
+ * to the non-heavy-hitter bucket.
+ * + Otherwise, if F decides that p belongs to a new heavy-hitter flow,
+ * then set up a new flow entry for the flow-id of p in the table T and
+ * send p to the heavy-hitter bucket.
+ *
+ * In this implementation:
+ * - T is a fixed-size hash-table with 1024 entries. Hash collision is
+ * resolved by linked-list chaining.
+ * - F has four counter arrays, each array containing 1024 32-bit counters.
+ * That means 4 * 1024 * 32 bits = 16KB of memory.
+ * - Since each array in F contains 1024 counters, 10 bits are sufficient to
+ * index into each array.
+ * Hence, instead of having four hash functions, we chop the 32-bit
+ * skb-hash into three 10-bit chunks, and the remaining 10-bit chunk is
+ * computed as XOR sum of those three chunks.
+ * - We need to clear the counter arrays periodically; however, directly
+ * memsetting 16KB of memory can lead to cache eviction and unwanted delay.
+ * So by representing each counter by a valid bit, we only need to reset
+ * 4K of 1 bit (i.e. 512 bytes) instead of 16KB of memory.
+ * - The Deficit Round Robin engine is taken from fq_codel implementation
+ * (net/sched/sch_fq_codel.c). Note that wdrr_bucket corresponds to
+ * fq_codel_flow in fq_codel implementation.
+ *
+ */
+
+/* Non-configurable parameters */
+#define HH_FLOWS_CNT 1024 /* number of entries in exact-matching table T */
+#define HHF_ARRAYS_CNT 4 /* number of arrays in multi-stage filter F */
+#define HHF_ARRAYS_LEN 1024 /* number of counters in each array of F */
+#define HHF_BIT_MASK_LEN 10 /* masking 10 bits */
+#define HHF_BIT_MASK 0x3FF /* bitmask of 10 bits */
+
+#define WDRR_BUCKET_CNT 2 /* two buckets for Weighted DRR */
+enum wdrr_bucket_idx {
+ WDRR_BUCKET_FOR_HH = 0, /* bucket id for heavy-hitters */
+ WDRR_BUCKET_FOR_NON_HH = 1 /* bucket id for non-heavy-hitters */
+};
+
+#define hhf_time_before(a, b) \
+ (typecheck(u32, a) && typecheck(u32, b) && ((s32)((a) - (b)) < 0))
+
+/* Heavy-hitter per-flow state */
+struct hh_flow_state {
+ u32 hash_id; /* hash of flow-id (e.g. TCP 5-tuple) */
+ u32 hit_timestamp; /* last time heavy-hitter was seen */
+ struct list_head flowchain; /* chaining under hash collision */
+};
+
+/* Weighted Deficit Round Robin (WDRR) scheduler */
+struct wdrr_bucket {
+ struct sk_buff *head;
+ struct sk_buff *tail;
+ struct list_head bucketchain;
+ int deficit;
+};
+
+struct hhf_sched_data {
+ struct wdrr_bucket buckets[WDRR_BUCKET_CNT];
+ u32 perturbation; /* hash perturbation */
+ u32 quantum; /* psched_mtu(qdisc_dev(sch)); */
+ u32 drop_overlimit; /* number of times max qdisc packet
+ * limit was hit
+ */
+ struct list_head *hh_flows; /* table T (currently active HHs) */
+ u32 hh_flows_limit; /* max active HH allocs */
+ u32 hh_flows_overlimit; /* num of disallowed HH allocs */
+ u32 hh_flows_total_cnt; /* total admitted HHs */
+ u32 hh_flows_current_cnt; /* total current HHs */
+ u32 *hhf_arrays[HHF_ARRAYS_CNT]; /* HH filter F */
+ u32 hhf_arrays_reset_timestamp; /* last time hhf_arrays
+ * was reset
+ */
+ unsigned long *hhf_valid_bits[HHF_ARRAYS_CNT]; /* shadow valid bits
+ * of hhf_arrays
+ */
+ /* Similar to the "new_flows" vs. "old_flows" concept in fq_codel DRR */
+ struct list_head new_buckets; /* list of new buckets */
+ struct list_head old_buckets; /* list of old buckets */
+
+ /* Configurable HHF parameters */
+ u32 hhf_reset_timeout; /* interval to reset counter
+ * arrays in filter F
+ * (default 40ms)
+ */
+ u32 hhf_admit_bytes; /* counter thresh to classify as
+ * HH (default 128KB).
+ * With these default values,
+ * 128KB / 40ms = 25 Mbps
+ * i.e., we expect to capture HHs
+ * sending > 25 Mbps.
+ */
+ u32 hhf_evict_timeout; /* aging threshold to evict idle
+ * HHs out of table T. This should
+ * be large enough to avoid
+ * reordering during HH eviction.
+ * (default 1s)
+ */
+ u32 hhf_non_hh_weight; /* WDRR weight for non-HHs
+ * (default 2,
+ * i.e., non-HH : HH = 2 : 1)
+ */
+};
+
+static u32 hhf_time_stamp(void)
+{
+ return jiffies;
+}
+
+static unsigned int skb_hash(const struct hhf_sched_data *q,
+ const struct sk_buff *skb)
+{
+ struct flow_keys keys;
+ unsigned int hash;
+
+ if (skb->sk && skb->sk->sk_hash)
+ return skb->sk->sk_hash;
+
+ skb_flow_dissect(skb, &keys);
+ hash = jhash_3words((__force u32)keys.dst,
+ (__force u32)keys.src ^ keys.ip_proto,
+ (__force u32)keys.ports, q->perturbation);
+ return hash;
+}
+
+/* Looks up a heavy-hitter flow in a chaining list of table T. */
+static struct hh_flow_state *seek_list(const u32 hash,
+ struct list_head *head,
+ struct hhf_sched_data *q)
+{
+ struct hh_flow_state *flow, *next;
+ u32 now = hhf_time_stamp();
+
+ if (list_empty(head))
+ return NULL;
+
+ list_for_each_entry_safe(flow, next, head, flowchain) {
+ u32 prev = flow->hit_timestamp + q->hhf_evict_timeout;
+
+ if (hhf_time_before(prev, now)) {
+ /* Delete expired heavy-hitters, but preserve one entry
+ * to avoid kzalloc() when next time this slot is hit.
+ */
+ if (list_is_last(&flow->flowchain, head))
+ return NULL;
+ list_del(&flow->flowchain);
+ kfree(flow);
+ q->hh_flows_current_cnt--;
+ } else if (flow->hash_id == hash) {
+ return flow;
+ }
+ }
+ return NULL;
+}
+
+/* Returns a flow state entry for a new heavy-hitter. Either reuses an expired
+ * entry or dynamically alloc a new entry.
+ */
+static struct hh_flow_state *alloc_new_hh(struct list_head *head,
+ struct hhf_sched_data *q)
+{
+ struct hh_flow_state *flow;
+ u32 now = hhf_time_stamp();
+
+ if (!list_empty(head)) {
+ /* Find an expired heavy-hitter flow entry. */
+ list_for_each_entry(flow, head, flowchain) {
+ u32 prev = flow->hit_timestamp + q->hhf_evict_timeout;
+
+ if (hhf_time_before(prev, now))
+ return flow;
+ }
+ }
+
+ if (q->hh_flows_current_cnt >= q->hh_flows_limit) {
+ q->hh_flows_overlimit++;
+ return NULL;
+ }
+ /* Create new entry. */
+ flow = kzalloc(sizeof(struct hh_flow_state), GFP_ATOMIC);
+ if (!flow)
+ return NULL;
+
+ q->hh_flows_current_cnt++;
+ INIT_LIST_HEAD(&flow->flowchain);
+ list_add_tail(&flow->flowchain, head);
+
+ return flow;
+}
+
+/* Assigns packets to WDRR buckets. Implements a multi-stage filter to
+ * classify heavy-hitters.
+ */
+static enum wdrr_bucket_idx hhf_classify(struct sk_buff *skb, struct Qdisc *sch)
+{
+ struct hhf_sched_data *q = qdisc_priv(sch);
+ u32 tmp_hash, hash;
+ u32 xorsum, filter_pos[HHF_ARRAYS_CNT], flow_pos;
+ struct hh_flow_state *flow;
+ u32 pkt_len, min_hhf_val;
+ int i;
+ u32 prev;
+ u32 now = hhf_time_stamp();
+
+ /* Reset the HHF counter arrays if this is the right time. */
+ prev = q->hhf_arrays_reset_timestamp + q->hhf_reset_timeout;
+ if (hhf_time_before(prev, now)) {
+ for (i = 0; i < HHF_ARRAYS_CNT; i++)
+ bitmap_zero(q->hhf_valid_bits[i], HHF_ARRAYS_LEN);
+ q->hhf_arrays_reset_timestamp = now;
+ }
+
+ /* Get hashed flow-id of the skb. */
+ hash = skb_hash(q, skb);
+
+ /* Check if this packet belongs to an already established HH flow. */
+ flow_pos = hash & HHF_BIT_MASK;
+ flow = seek_list(hash, &q->hh_flows[flow_pos], q);
+ if (flow) { /* found its HH flow */
+ flow->hit_timestamp = now;
+ return WDRR_BUCKET_FOR_HH;
+ }
+
+ /* Now pass the packet through the multi-stage filter. */
+ tmp_hash = hash;
+ xorsum = 0;
+ for (i = 0; i < HHF_ARRAYS_CNT - 1; i++) {
+ /* Split the skb_hash into three 10-bit chunks. */
+ filter_pos[i] = tmp_hash & HHF_BIT_MASK;
+ xorsum ^= filter_pos[i];
+ tmp_hash >>= HHF_BIT_MASK_LEN;
+ }
+ /* The last chunk is computed as XOR sum of other chunks. */
+ filter_pos[HHF_ARRAYS_CNT - 1] = xorsum ^ tmp_hash;
+
+ pkt_len = qdisc_pkt_len(skb);
+ min_hhf_val = ~0U;
+ for (i = 0; i < HHF_ARRAYS_CNT; i++) {
+ u32 val;
+
+ if (!test_bit(filter_pos[i], q->hhf_valid_bits[i])) {
+ q->hhf_arrays[i][filter_pos[i]] = 0;
+ __set_bit(filter_pos[i], q->hhf_valid_bits[i]);
+ }
+
+ val = q->hhf_arrays[i][filter_pos[i]] + pkt_len;
+ if (min_hhf_val > val)
+ min_hhf_val = val;
+ }
+
+ /* Found a new HH iff all counter values > HH admit threshold. */
+ if (min_hhf_val > q->hhf_admit_bytes) {
+ /* Just captured a new heavy-hitter. */
+ flow = alloc_new_hh(&q->hh_flows[flow_pos], q);
+ if (!flow) /* memory alloc problem */
+ return WDRR_BUCKET_FOR_NON_HH;
+ flow->hash_id = hash;
+ flow->hit_timestamp = now;
+ q->hh_flows_total_cnt++;
+
+ /* By returning without updating counters in q->hhf_arrays,
+ * we implicitly implement "shielding" (see Optimization O1).
+ */
+ return WDRR_BUCKET_FOR_HH;
+ }
+
+ /* Conservative update of HHF arrays (see Optimization O2). */
+ for (i = 0; i < HHF_ARRAYS_CNT; i++) {
+ if (q->hhf_arrays[i][filter_pos[i]] < min_hhf_val)
+ q->hhf_arrays[i][filter_pos[i]] = min_hhf_val;
+ }
+ return WDRR_BUCKET_FOR_NON_HH;
+}
+
+/* Removes one skb from head of bucket. */
+static struct sk_buff *dequeue_head(struct wdrr_bucket *bucket)
+{
+ struct sk_buff *skb = bucket->head;
+
+ bucket->head = skb->next;
+ skb->next = NULL;
+ return skb;
+}
+
+/* Tail-adds skb to bucket. */
+static void bucket_add(struct wdrr_bucket *bucket, struct sk_buff *skb)
+{
+ if (bucket->head == NULL)
+ bucket->head = skb;
+ else
+ bucket->tail->next = skb;
+ bucket->tail = skb;
+ skb->next = NULL;
+}
+
+static unsigned int hhf_drop(struct Qdisc *sch)
+{
+ struct hhf_sched_data *q = qdisc_priv(sch);
+ struct wdrr_bucket *bucket;
+
+ /* Always try to drop from heavy-hitters first. */
+ bucket = &q->buckets[WDRR_BUCKET_FOR_HH];
+ if (!bucket->head)
+ bucket = &q->buckets[WDRR_BUCKET_FOR_NON_HH];
+
+ if (bucket->head) {
+ struct sk_buff *skb = dequeue_head(bucket);
+
+ sch->q.qlen--;
+ sch->qstats.drops++;
+ sch->qstats.backlog -= qdisc_pkt_len(skb);
+ kfree_skb(skb);
+ }
+
+ /* Return id of the bucket from which the packet was dropped. */
+ return bucket - q->buckets;
+}
+
+static int hhf_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+{
+ struct hhf_sched_data *q = qdisc_priv(sch);
+ enum wdrr_bucket_idx idx;
+ struct wdrr_bucket *bucket;
+
+ idx = hhf_classify(skb, sch);
+
+ bucket = &q->buckets[idx];
+ bucket_add(bucket, skb);
+ sch->qstats.backlog += qdisc_pkt_len(skb);
+
+ if (list_empty(&bucket->bucketchain)) {
+ unsigned int weight;
+
+ /* The logic of new_buckets vs. old_buckets is the same as
+ * new_flows vs. old_flows in the implementation of fq_codel,
+ * i.e., short bursts of non-HHs should have strict priority.
+ */
+ if (idx == WDRR_BUCKET_FOR_HH) {
+ /* Always move heavy-hitters to old bucket. */
+ weight = 1;
+ list_add_tail(&bucket->bucketchain, &q->old_buckets);
+ } else {
+ weight = q->hhf_non_hh_weight;
+ list_add_tail(&bucket->bucketchain, &q->new_buckets);
+ }
+ bucket->deficit = weight * q->quantum;
+ }
+ if (++sch->q.qlen < sch->limit)
+ return NET_XMIT_SUCCESS;
+
+ q->drop_overlimit++;
+ /* Return Congestion Notification only if we dropped a packet from this
+ * bucket.
+ */
+ if (hhf_drop(sch) == idx)
+ return NET_XMIT_CN;
+
+ /* As we dropped a packet, better let upper stack know this. */
+ qdisc_tree_decrease_qlen(sch, 1);
+ return NET_XMIT_SUCCESS;
+}
+
+static struct sk_buff *hhf_dequeue(struct Qdisc *sch)
+{
+ struct hhf_sched_data *q = qdisc_priv(sch);
+ struct sk_buff *skb = NULL;
+ struct wdrr_bucket *bucket;
+ struct list_head *head;
+
+begin:
+ head = &q->new_buckets;
+ if (list_empty(head)) {
+ head = &q->old_buckets;
+ if (list_empty(head))
+ return NULL;
+ }
+ bucket = list_first_entry(head, struct wdrr_bucket, bucketchain);
+
+ if (bucket->deficit <= 0) {
+ int weight = (bucket - q->buckets == WDRR_BUCKET_FOR_HH) ?
+ 1 : q->hhf_non_hh_weight;
+
+ bucket->deficit += weight * q->quantum;
+ list_move_tail(&bucket->bucketchain, &q->old_buckets);
+ goto begin;
+ }
+
+ if (bucket->head) {
+ skb = dequeue_head(bucket);
+ sch->q.qlen--;
+ sch->qstats.backlog -= qdisc_pkt_len(skb);
+ }
+
+ if (!skb) {
+ /* Force a pass through old_buckets to prevent starvation. */
+ if ((head == &q->new_buckets) && !list_empty(&q->old_buckets))
+ list_move_tail(&bucket->bucketchain, &q->old_buckets);
+ else
+ list_del_init(&bucket->bucketchain);
+ goto begin;
+ }
+ qdisc_bstats_update(sch, skb);
+ bucket->deficit -= qdisc_pkt_len(skb);
+
+ return skb;
+}
+
+static void hhf_reset(struct Qdisc *sch)
+{
+ struct sk_buff *skb;
+
+ while ((skb = hhf_dequeue(sch)) != NULL)
+ kfree_skb(skb);
+}
+
+static void *hhf_zalloc(size_t sz)
+{
+ void *ptr = kzalloc(sz, GFP_KERNEL | __GFP_NOWARN);
+
+ if (!ptr)
+ ptr = vzalloc(sz);
+
+ return ptr;
+}
+
+static void hhf_free(void *addr)
+{
+ if (addr) {
+ if (is_vmalloc_addr(addr))
+ vfree(addr);
+ else
+ kfree(addr);
+ }
+}
+
+static void hhf_destroy(struct Qdisc *sch)
+{
+ int i;
+ struct hhf_sched_data *q = qdisc_priv(sch);
+
+ for (i = 0; i < HHF_ARRAYS_CNT; i++) {
+ hhf_free(q->hhf_arrays[i]);
+ hhf_free(q->hhf_valid_bits[i]);
+ }
+
+ for (i = 0; i < HH_FLOWS_CNT; i++) {
+ struct hh_flow_state *flow, *next;
+ struct list_head *head = &q->hh_flows[i];
+
+ if (list_empty(head))
+ continue;
+ list_for_each_entry_safe(flow, next, head, flowchain) {
+ list_del(&flow->flowchain);
+ kfree(flow);
+ }
+ }
+ hhf_free(q->hh_flows);
+}
+
+static const struct nla_policy hhf_policy[TCA_HHF_MAX + 1] = {
+ [TCA_HHF_BACKLOG_LIMIT] = { .type = NLA_U32 },
+ [TCA_HHF_QUANTUM] = { .type = NLA_U32 },
+ [TCA_HHF_HH_FLOWS_LIMIT] = { .type = NLA_U32 },
+ [TCA_HHF_RESET_TIMEOUT] = { .type = NLA_U32 },
+ [TCA_HHF_ADMIT_BYTES] = { .type = NLA_U32 },
+ [TCA_HHF_EVICT_TIMEOUT] = { .type = NLA_U32 },
+ [TCA_HHF_NON_HH_WEIGHT] = { .type = NLA_U32 },
+};
+
+static int hhf_change(struct Qdisc *sch, struct nlattr *opt)
+{
+ struct hhf_sched_data *q = qdisc_priv(sch);
+ struct nlattr *tb[TCA_HHF_MAX + 1];
+ unsigned int qlen;
+ int err;
+ u64 non_hh_quantum;
+ u32 new_quantum = q->quantum;
+ u32 new_hhf_non_hh_weight = q->hhf_non_hh_weight;
+
+ if (!opt)
+ return -EINVAL;
+
+ err = nla_parse_nested(tb, TCA_HHF_MAX, opt, hhf_policy);
+ if (err < 0)
+ return err;
+
+ sch_tree_lock(sch);
+
+ if (tb[TCA_HHF_BACKLOG_LIMIT])
+ sch->limit = nla_get_u32(tb[TCA_HHF_BACKLOG_LIMIT]);
+
+ if (tb[TCA_HHF_QUANTUM])
+ new_quantum = nla_get_u32(tb[TCA_HHF_QUANTUM]);
+
+ if (tb[TCA_HHF_NON_HH_WEIGHT])
+ new_hhf_non_hh_weight = nla_get_u32(tb[TCA_HHF_NON_HH_WEIGHT]);
+
+ non_hh_quantum = (u64)new_quantum * new_hhf_non_hh_weight;
+ if (non_hh_quantum > INT_MAX)
+ return -EINVAL;
+ q->quantum = new_quantum;
+ q->hhf_non_hh_weight = new_hhf_non_hh_weight;
+
+ if (tb[TCA_HHF_HH_FLOWS_LIMIT])
+ q->hh_flows_limit = nla_get_u32(tb[TCA_HHF_HH_FLOWS_LIMIT]);
+
+ if (tb[TCA_HHF_RESET_TIMEOUT]) {
+ u32 ms = nla_get_u32(tb[TCA_HHF_RESET_TIMEOUT]);
+
+ q->hhf_reset_timeout = msecs_to_jiffies(ms);
+ }
+
+ if (tb[TCA_HHF_ADMIT_BYTES])
+ q->hhf_admit_bytes = nla_get_u32(tb[TCA_HHF_ADMIT_BYTES]);
+
+ if (tb[TCA_HHF_EVICT_TIMEOUT]) {
+ u32 ms = nla_get_u32(tb[TCA_HHF_EVICT_TIMEOUT]);
+
+ q->hhf_evict_timeout = msecs_to_jiffies(ms);
+ }
+
+ qlen = sch->q.qlen;
+ while (sch->q.qlen > sch->limit) {
+ struct sk_buff *skb = hhf_dequeue(sch);
+
+ kfree_skb(skb);
+ }
+ qdisc_tree_decrease_qlen(sch, qlen - sch->q.qlen);
+
+ sch_tree_unlock(sch);
+ return 0;
+}
+
+static int hhf_init(struct Qdisc *sch, struct nlattr *opt)
+{
+ struct hhf_sched_data *q = qdisc_priv(sch);
+ int i;
+
+ sch->limit = 1000;
+ q->quantum = psched_mtu(qdisc_dev(sch));
+ q->perturbation = net_random();
+ INIT_LIST_HEAD(&q->new_buckets);
+ INIT_LIST_HEAD(&q->old_buckets);
+
+ /* Configurable HHF parameters */
+ q->hhf_reset_timeout = HZ / 25; /* 40 ms */
+ q->hhf_admit_bytes = 131072; /* 128 KB */
+ q->hhf_evict_timeout = HZ; /* 1 sec */
+ q->hhf_non_hh_weight = 2;
+
+ if (opt) {
+ int err = hhf_change(sch, opt);
+
+ if (err)
+ return err;
+ }
+
+ if (!q->hh_flows) {
+ /* Initialize heavy-hitter flow table. */
+ q->hh_flows = hhf_zalloc(HH_FLOWS_CNT *
+ sizeof(struct list_head));
+ if (!q->hh_flows)
+ return -ENOMEM;
+ for (i = 0; i < HH_FLOWS_CNT; i++)
+ INIT_LIST_HEAD(&q->hh_flows[i]);
+
+ /* Cap max active HHs at twice len of hh_flows table. */
+ q->hh_flows_limit = 2 * HH_FLOWS_CNT;
+ q->hh_flows_overlimit = 0;
+ q->hh_flows_total_cnt = 0;
+ q->hh_flows_current_cnt = 0;
+
+ /* Initialize heavy-hitter filter arrays. */
+ for (i = 0; i < HHF_ARRAYS_CNT; i++) {
+ q->hhf_arrays[i] = hhf_zalloc(HHF_ARRAYS_LEN *
+ sizeof(u32));
+ if (!q->hhf_arrays[i]) {
+ hhf_destroy(sch);
+ return -ENOMEM;
+ }
+ }
+ q->hhf_arrays_reset_timestamp = hhf_time_stamp();
+
+ /* Initialize valid bits of heavy-hitter filter arrays. */
+ for (i = 0; i < HHF_ARRAYS_CNT; i++) {
+ q->hhf_valid_bits[i] = hhf_zalloc(HHF_ARRAYS_LEN /
+ BITS_PER_BYTE);
+ if (!q->hhf_valid_bits[i]) {
+ hhf_destroy(sch);
+ return -ENOMEM;
+ }
+ }
+
+ /* Initialize Weighted DRR buckets. */
+ for (i = 0; i < WDRR_BUCKET_CNT; i++) {
+ struct wdrr_bucket *bucket = q->buckets + i;
+
+ INIT_LIST_HEAD(&bucket->bucketchain);
+ }
+ }
+
+ return 0;
+}
+
+static int hhf_dump(struct Qdisc *sch, struct sk_buff *skb)
+{
+ struct hhf_sched_data *q = qdisc_priv(sch);
+ struct nlattr *opts;
+
+ opts = nla_nest_start(skb, TCA_OPTIONS);
+ if (opts == NULL)
+ goto nla_put_failure;
+
+ if (nla_put_u32(skb, TCA_HHF_BACKLOG_LIMIT, sch->limit) ||
+ nla_put_u32(skb, TCA_HHF_QUANTUM, q->quantum) ||
+ nla_put_u32(skb, TCA_HHF_HH_FLOWS_LIMIT, q->hh_flows_limit) ||
+ nla_put_u32(skb, TCA_HHF_RESET_TIMEOUT,
+ jiffies_to_msecs(q->hhf_reset_timeout)) ||
+ nla_put_u32(skb, TCA_HHF_ADMIT_BYTES, q->hhf_admit_bytes) ||
+ nla_put_u32(skb, TCA_HHF_EVICT_TIMEOUT,
+ jiffies_to_msecs(q->hhf_evict_timeout)) ||
+ nla_put_u32(skb, TCA_HHF_NON_HH_WEIGHT, q->hhf_non_hh_weight))
+ goto nla_put_failure;
+
+ nla_nest_end(skb, opts);
+ return skb->len;
+
+nla_put_failure:
+ return -1;
+}
+
+static int hhf_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
+{
+ struct hhf_sched_data *q = qdisc_priv(sch);
+ struct tc_hhf_xstats st = {
+ .drop_overlimit = q->drop_overlimit,
+ .hh_overlimit = q->hh_flows_overlimit,
+ .hh_tot_count = q->hh_flows_total_cnt,
+ .hh_cur_count = q->hh_flows_current_cnt,
+ };
+
+ return gnet_stats_copy_app(d, &st, sizeof(st));
+}
+
+static struct Qdisc_ops hhf_qdisc_ops __read_mostly = {
+ .id = "hhf",
+ .priv_size = sizeof(struct hhf_sched_data),
+
+ .enqueue = hhf_enqueue,
+ .dequeue = hhf_dequeue,
+ .peek = qdisc_peek_dequeued,
+ .drop = hhf_drop,
+ .init = hhf_init,
+ .reset = hhf_reset,
+ .destroy = hhf_destroy,
+ .change = hhf_change,
+ .dump = hhf_dump,
+ .dump_stats = hhf_dump_stats,
+ .owner = THIS_MODULE,
+};
+
+static int __init hhf_module_init(void)
+{
+ return register_qdisc(&hhf_qdisc_ops);
+}
+
+static void __exit hhf_module_exit(void)
+{
+ unregister_qdisc(&hhf_qdisc_ops);
+}
+
+module_init(hhf_module_init)
+module_exit(hhf_module_exit)
+MODULE_AUTHOR("Terry Lam");
+MODULE_AUTHOR("Nandita Dukkipati");
+MODULE_LICENSE("GPL");
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 830c64f25539..0db5a6eae87f 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -712,7 +712,7 @@ static s64 htb_do_events(struct htb_sched *q, const int level,
/* too much load - let's continue after a break for scheduling */
if (!(q->warned & HTB_WARN_TOOMANYEVENTS)) {
- pr_warning("htb: too many events!\n");
+ pr_warn("htb: too many events!\n");
q->warned |= HTB_WARN_TOOMANYEVENTS;
}
@@ -1276,9 +1276,10 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg)
struct Qdisc *new_q = NULL;
int last_child = 0;
- // TODO: why don't allow to delete subtree ? references ? does
- // tc subsys quarantee us that in htb_destroy it holds no class
- // refs so that we can remove children safely there ?
+ /* TODO: why don't allow to delete subtree ? references ? does
+ * tc subsys guarantee us that in htb_destroy it holds no class
+ * refs so that we can remove children safely there ?
+ */
if (cl->children || cl->filter_cnt)
return -EBUSY;
@@ -1471,21 +1472,30 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
sch_tree_lock(sch);
}
+ rate64 = tb[TCA_HTB_RATE64] ? nla_get_u64(tb[TCA_HTB_RATE64]) : 0;
+
+ ceil64 = tb[TCA_HTB_CEIL64] ? nla_get_u64(tb[TCA_HTB_CEIL64]) : 0;
+
+ psched_ratecfg_precompute(&cl->rate, &hopt->rate, rate64);
+ psched_ratecfg_precompute(&cl->ceil, &hopt->ceil, ceil64);
+
/* it used to be a nasty bug here, we have to check that node
* is really leaf before changing cl->un.leaf !
*/
if (!cl->level) {
- cl->quantum = hopt->rate.rate / q->rate2quantum;
+ u64 quantum = cl->rate.rate_bytes_ps;
+
+ do_div(quantum, q->rate2quantum);
+ cl->quantum = min_t(u64, quantum, INT_MAX);
+
if (!hopt->quantum && cl->quantum < 1000) {
- pr_warning(
- "HTB: quantum of class %X is small. Consider r2q change.\n",
- cl->common.classid);
+ pr_warn("HTB: quantum of class %X is small. Consider r2q change.\n",
+ cl->common.classid);
cl->quantum = 1000;
}
if (!hopt->quantum && cl->quantum > 200000) {
- pr_warning(
- "HTB: quantum of class %X is big. Consider r2q change.\n",
- cl->common.classid);
+ pr_warn("HTB: quantum of class %X is big. Consider r2q change.\n",
+ cl->common.classid);
cl->quantum = 200000;
}
if (hopt->quantum)
@@ -1494,13 +1504,6 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
cl->prio = TC_HTB_NUMPRIO - 1;
}
- rate64 = tb[TCA_HTB_RATE64] ? nla_get_u64(tb[TCA_HTB_RATE64]) : 0;
-
- ceil64 = tb[TCA_HTB_CEIL64] ? nla_get_u64(tb[TCA_HTB_CEIL64]) : 0;
-
- psched_ratecfg_precompute(&cl->rate, &hopt->rate, rate64);
- psched_ratecfg_precompute(&cl->ceil, &hopt->ceil, ceil64);
-
cl->buffer = PSCHED_TICKS2NS(hopt->buffer);
cl->cbuffer = PSCHED_TICKS2NS(hopt->cbuffer);
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index f3befd6b4781..090a4e3ecd0d 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -88,7 +88,7 @@ struct netem_sched_data {
u32 duplicate;
u32 reorder;
u32 corrupt;
- u32 rate;
+ u64 rate;
s32 packet_overhead;
u32 cell_size;
u32 cell_size_reciprocal;
@@ -495,7 +495,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
now = netem_skb_cb(last)->time_to_send;
}
- delay += packet_len_2_sched_time(skb->len, q);
+ delay += packet_len_2_sched_time(qdisc_pkt_len(skb), q);
}
cb->time_to_send = now + delay;
@@ -782,6 +782,7 @@ static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = {
[TCA_NETEM_RATE] = { .len = sizeof(struct tc_netem_rate) },
[TCA_NETEM_LOSS] = { .type = NLA_NESTED },
[TCA_NETEM_ECN] = { .type = NLA_U32 },
+ [TCA_NETEM_RATE64] = { .type = NLA_U64 },
};
static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla,
@@ -852,6 +853,10 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt)
if (tb[TCA_NETEM_RATE])
get_rate(sch, tb[TCA_NETEM_RATE]);
+ if (tb[TCA_NETEM_RATE64])
+ q->rate = max_t(u64, q->rate,
+ nla_get_u64(tb[TCA_NETEM_RATE64]));
+
if (tb[TCA_NETEM_ECN])
q->ecn = nla_get_u32(tb[TCA_NETEM_ECN]);
@@ -974,7 +979,13 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
if (nla_put(skb, TCA_NETEM_CORRUPT, sizeof(corrupt), &corrupt))
goto nla_put_failure;
- rate.rate = q->rate;
+ if (q->rate >= (1ULL << 32)) {
+ if (nla_put_u64(skb, TCA_NETEM_RATE64, q->rate))
+ goto nla_put_failure;
+ rate.rate = ~0U;
+ } else {
+ rate.rate = q->rate;
+ }
rate.packet_overhead = q->packet_overhead;
rate.cell_size = q->cell_size;
rate.cell_overhead = q->cell_overhead;
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index a6090051c5db..fbba5b0ec121 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -118,6 +118,32 @@ struct tbf_sched_data {
};
+/* Time to Length, convert time in ns to length in bytes
+ * to determinate how many bytes can be sent in given time.
+ */
+static u64 psched_ns_t2l(const struct psched_ratecfg *r,
+ u64 time_in_ns)
+{
+ /* The formula is :
+ * len = (time_in_ns * r->rate_bytes_ps) / NSEC_PER_SEC
+ */
+ u64 len = time_in_ns * r->rate_bytes_ps;
+
+ do_div(len, NSEC_PER_SEC);
+
+ if (unlikely(r->linklayer == TC_LINKLAYER_ATM)) {
+ do_div(len, 53);
+ len = len * 48;
+ }
+
+ if (len > r->overhead)
+ len -= r->overhead;
+ else
+ len = 0;
+
+ return len;
+}
+
/*
* Return length of individual segments of a gso packet,
* including all headers (MAC, IP, TCP/UDP)
@@ -281,6 +307,8 @@ static const struct nla_policy tbf_policy[TCA_TBF_MAX + 1] = {
[TCA_TBF_PTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
[TCA_TBF_RATE64] = { .type = NLA_U64 },
[TCA_TBF_PRATE64] = { .type = NLA_U64 },
+ [TCA_TBF_BURST] = { .type = NLA_U32 },
+ [TCA_TBF_PBURST] = { .type = NLA_U32 },
};
static int tbf_change(struct Qdisc *sch, struct nlattr *opt)
@@ -289,10 +317,11 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt)
struct tbf_sched_data *q = qdisc_priv(sch);
struct nlattr *tb[TCA_TBF_MAX + 1];
struct tc_tbf_qopt *qopt;
- struct qdisc_rate_table *rtab = NULL;
- struct qdisc_rate_table *ptab = NULL;
struct Qdisc *child = NULL;
- int max_size, n;
+ struct psched_ratecfg rate;
+ struct psched_ratecfg peak;
+ u64 max_size;
+ s64 buffer, mtu;
u64 rate64 = 0, prate64 = 0;
err = nla_parse_nested(tb, TCA_TBF_MAX, opt, tbf_policy);
@@ -304,38 +333,13 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt)
goto done;
qopt = nla_data(tb[TCA_TBF_PARMS]);
- rtab = qdisc_get_rtab(&qopt->rate, tb[TCA_TBF_RTAB]);
- if (rtab == NULL)
- goto done;
+ if (qopt->rate.linklayer == TC_LINKLAYER_UNAWARE)
+ qdisc_put_rtab(qdisc_get_rtab(&qopt->rate,
+ tb[TCA_TBF_RTAB]));
- if (qopt->peakrate.rate) {
- if (qopt->peakrate.rate > qopt->rate.rate)
- ptab = qdisc_get_rtab(&qopt->peakrate, tb[TCA_TBF_PTAB]);
- if (ptab == NULL)
- goto done;
- }
-
- for (n = 0; n < 256; n++)
- if (rtab->data[n] > qopt->buffer)
- break;
- max_size = (n << qopt->rate.cell_log) - 1;
- if (ptab) {
- int size;
-
- for (n = 0; n < 256; n++)
- if (ptab->data[n] > qopt->mtu)
- break;
- size = (n << qopt->peakrate.cell_log) - 1;
- if (size < max_size)
- max_size = size;
- }
- if (max_size < 0)
- goto done;
-
- if (max_size < psched_mtu(qdisc_dev(sch)))
- pr_warn_ratelimited("sch_tbf: burst %u is lower than device %s mtu (%u) !\n",
- max_size, qdisc_dev(sch)->name,
- psched_mtu(qdisc_dev(sch)));
+ if (qopt->peakrate.linklayer == TC_LINKLAYER_UNAWARE)
+ qdisc_put_rtab(qdisc_get_rtab(&qopt->peakrate,
+ tb[TCA_TBF_PTAB]));
if (q->qdisc != &noop_qdisc) {
err = fifo_set_limit(q->qdisc, qopt->limit);
@@ -349,6 +353,50 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt)
}
}
+ buffer = min_t(u64, PSCHED_TICKS2NS(qopt->buffer), ~0U);
+ mtu = min_t(u64, PSCHED_TICKS2NS(qopt->mtu), ~0U);
+
+ if (tb[TCA_TBF_RATE64])
+ rate64 = nla_get_u64(tb[TCA_TBF_RATE64]);
+ psched_ratecfg_precompute(&rate, &qopt->rate, rate64);
+
+ if (tb[TCA_TBF_BURST]) {
+ max_size = nla_get_u32(tb[TCA_TBF_BURST]);
+ buffer = psched_l2t_ns(&rate, max_size);
+ } else {
+ max_size = min_t(u64, psched_ns_t2l(&rate, buffer), ~0U);
+ }
+
+ if (qopt->peakrate.rate) {
+ if (tb[TCA_TBF_PRATE64])
+ prate64 = nla_get_u64(tb[TCA_TBF_PRATE64]);
+ psched_ratecfg_precompute(&peak, &qopt->peakrate, prate64);
+ if (peak.rate_bytes_ps <= rate.rate_bytes_ps) {
+ pr_warn_ratelimited("sch_tbf: peakrate %llu is lower than or equals to rate %llu !\n",
+ peak.rate_bytes_ps, rate.rate_bytes_ps);
+ err = -EINVAL;
+ goto done;
+ }
+
+ if (tb[TCA_TBF_PBURST]) {
+ u32 pburst = nla_get_u32(tb[TCA_TBF_PBURST]);
+ max_size = min_t(u32, max_size, pburst);
+ mtu = psched_l2t_ns(&peak, pburst);
+ } else {
+ max_size = min_t(u64, max_size, psched_ns_t2l(&peak, mtu));
+ }
+ }
+
+ if (max_size < psched_mtu(qdisc_dev(sch)))
+ pr_warn_ratelimited("sch_tbf: burst %llu is lower than device %s mtu (%u) !\n",
+ max_size, qdisc_dev(sch)->name,
+ psched_mtu(qdisc_dev(sch)));
+
+ if (!max_size) {
+ err = -EINVAL;
+ goto done;
+ }
+
sch_tree_lock(sch);
if (child) {
qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen);
@@ -356,19 +404,21 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt)
q->qdisc = child;
}
q->limit = qopt->limit;
- q->mtu = PSCHED_TICKS2NS(qopt->mtu);
+ if (tb[TCA_TBF_PBURST])
+ q->mtu = mtu;
+ else
+ q->mtu = PSCHED_TICKS2NS(qopt->mtu);
q->max_size = max_size;
- q->buffer = PSCHED_TICKS2NS(qopt->buffer);
+ if (tb[TCA_TBF_BURST])
+ q->buffer = buffer;
+ else
+ q->buffer = PSCHED_TICKS2NS(qopt->buffer);
q->tokens = q->buffer;
q->ptokens = q->mtu;
- if (tb[TCA_TBF_RATE64])
- rate64 = nla_get_u64(tb[TCA_TBF_RATE64]);
- psched_ratecfg_precompute(&q->rate, &rtab->rate, rate64);
- if (ptab) {
- if (tb[TCA_TBF_PRATE64])
- prate64 = nla_get_u64(tb[TCA_TBF_PRATE64]);
- psched_ratecfg_precompute(&q->peak, &ptab->rate, prate64);
+ memcpy(&q->rate, &rate, sizeof(struct psched_ratecfg));
+ if (qopt->peakrate.rate) {
+ memcpy(&q->peak, &peak, sizeof(struct psched_ratecfg));
q->peak_present = true;
} else {
q->peak_present = false;
@@ -377,10 +427,6 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt)
sch_tree_unlock(sch);
err = 0;
done:
- if (rtab)
- qdisc_put_rtab(rtab);
- if (ptab)
- qdisc_put_rtab(ptab);
return err;
}