summaryrefslogtreecommitdiff
path: root/net/sched
diff options
context:
space:
mode:
Diffstat (limited to 'net/sched')
-rw-r--r--net/sched/Kconfig13
-rw-r--r--net/sched/Makefile1
-rw-r--r--net/sched/act_ctinfo.c11
-rw-r--r--net/sched/act_ife.c7
-rw-r--r--net/sched/cls_api.c5
-rw-r--r--net/sched/cls_basic.c11
-rw-r--r--net/sched/cls_bpf.c11
-rw-r--r--net/sched/cls_flower.c12
-rw-r--r--net/sched/cls_fw.c11
-rw-r--r--net/sched/cls_matchall.c12
-rw-r--r--net/sched/cls_route.c11
-rw-r--r--net/sched/cls_rsvp.h17
-rw-r--r--net/sched/cls_tcindex.c54
-rw-r--r--net/sched/cls_u32.c11
-rw-r--r--net/sched/ematch.c5
-rw-r--r--net/sched/sch_api.c47
-rw-r--r--net/sched/sch_cake.c6
-rw-r--r--net/sched/sch_choke.c2
-rw-r--r--net/sched/sch_fq.c6
-rw-r--r--net/sched/sch_fq_pie.c562
-rw-r--r--net/sched/sch_pie.c289
-rw-r--r--net/sched/sch_prio.c10
-rw-r--r--net/sched/sch_taprio.c92
-rw-r--r--net/sched/sch_tbf.c60
24 files changed, 959 insertions, 307 deletions
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index b1e7ec726958..edde0e519438 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -366,6 +366,19 @@ config NET_SCH_PIE
If unsure, say N.
+config NET_SCH_FQ_PIE
+ depends on NET_SCH_PIE
+ tristate "Flow Queue Proportional Integral controller Enhanced (FQ-PIE)"
+ help
+ Say Y here if you want to use the Flow Queue Proportional Integral
+ controller Enhanced (FQ-PIE) packet scheduling algorithm.
+ For more information, please see https://tools.ietf.org/html/rfc8033
+
+ To compile this driver as a module, choose M here: the module
+ will be called sch_fq_pie.
+
+ If unsure, say N.
+
config NET_SCH_INGRESS
tristate "Ingress/classifier-action Qdisc"
depends on NET_CLS_ACT
diff --git a/net/sched/Makefile b/net/sched/Makefile
index bc8856b865ff..31c367a6cd09 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -59,6 +59,7 @@ obj-$(CONFIG_NET_SCH_CAKE) += sch_cake.o
obj-$(CONFIG_NET_SCH_FQ) += sch_fq.o
obj-$(CONFIG_NET_SCH_HHF) += sch_hhf.o
obj-$(CONFIG_NET_SCH_PIE) += sch_pie.o
+obj-$(CONFIG_NET_SCH_FQ_PIE) += sch_fq_pie.o
obj-$(CONFIG_NET_SCH_CBS) += sch_cbs.o
obj-$(CONFIG_NET_SCH_ETF) += sch_etf.o
obj-$(CONFIG_NET_SCH_TAPRIO) += sch_taprio.o
diff --git a/net/sched/act_ctinfo.c b/net/sched/act_ctinfo.c
index 40038c321b4a..19649623493b 100644
--- a/net/sched/act_ctinfo.c
+++ b/net/sched/act_ctinfo.c
@@ -360,6 +360,16 @@ static int tcf_ctinfo_search(struct net *net, struct tc_action **a, u32 index)
return tcf_idr_search(tn, a, index);
}
+static void tcf_ctinfo_cleanup(struct tc_action *a)
+{
+ struct tcf_ctinfo *ci = to_ctinfo(a);
+ struct tcf_ctinfo_params *cp;
+
+ cp = rcu_dereference_protected(ci->params, 1);
+ if (cp)
+ kfree_rcu(cp, rcu);
+}
+
static struct tc_action_ops act_ctinfo_ops = {
.kind = "ctinfo",
.id = TCA_ID_CTINFO,
@@ -367,6 +377,7 @@ static struct tc_action_ops act_ctinfo_ops = {
.act = tcf_ctinfo_act,
.dump = tcf_ctinfo_dump,
.init = tcf_ctinfo_init,
+ .cleanup= tcf_ctinfo_cleanup,
.walk = tcf_ctinfo_walker,
.lookup = tcf_ctinfo_search,
.size = sizeof(struct tcf_ctinfo),
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index 5e6379028fc3..c1fcd85719d6 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -537,6 +537,9 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
}
ife = to_ife(*a);
+ if (ret == ACT_P_CREATED)
+ INIT_LIST_HEAD(&ife->metalist);
+
err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
if (err < 0)
goto release_idr;
@@ -566,10 +569,6 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
p->eth_type = ife_type;
}
-
- if (ret == ACT_P_CREATED)
- INIT_LIST_HEAD(&ife->metalist);
-
if (tb[TCA_IFE_METALST]) {
err = nla_parse_nested_deprecated(tb2, IFE_META_MAX,
tb[TCA_IFE_METALST], NULL,
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 76e0d122616a..c2cdd0fc2e70 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -2055,9 +2055,8 @@ replay:
&chain_info));
mutex_unlock(&chain->filter_chain_lock);
- tp_new = tcf_proto_create(nla_data(tca[TCA_KIND]),
- protocol, prio, chain, rtnl_held,
- extack);
+ tp_new = tcf_proto_create(name, protocol, prio, chain,
+ rtnl_held, extack);
if (IS_ERR(tp_new)) {
err = PTR_ERR(tp_new);
goto errout_tp;
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index 4aafbe3d435c..f256a7c69093 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -263,12 +263,17 @@ skip:
}
}
-static void basic_bind_class(void *fh, u32 classid, unsigned long cl)
+static void basic_bind_class(void *fh, u32 classid, unsigned long cl, void *q,
+ unsigned long base)
{
struct basic_filter *f = fh;
- if (f && f->res.classid == classid)
- f->res.class = cl;
+ if (f && f->res.classid == classid) {
+ if (cl)
+ __tcf_bind_filter(q, &f->res, base);
+ else
+ __tcf_unbind_filter(q, &f->res);
+ }
}
static int basic_dump(struct net *net, struct tcf_proto *tp, void *fh,
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index 8229ed4a67be..6e3e63db0e01 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -631,12 +631,17 @@ nla_put_failure:
return -1;
}
-static void cls_bpf_bind_class(void *fh, u32 classid, unsigned long cl)
+static void cls_bpf_bind_class(void *fh, u32 classid, unsigned long cl,
+ void *q, unsigned long base)
{
struct cls_bpf_prog *prog = fh;
- if (prog && prog->res.classid == classid)
- prog->res.class = cl;
+ if (prog && prog->res.classid == classid) {
+ if (cl)
+ __tcf_bind_filter(q, &prog->res, base);
+ else
+ __tcf_unbind_filter(q, &prog->res);
+ }
}
static void cls_bpf_walk(struct tcf_proto *tp, struct tcf_walker *arg,
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index b0f42e62dd76..7e54d2ab5254 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -691,6 +691,7 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = {
.len = 128 / BITS_PER_BYTE },
[TCA_FLOWER_KEY_CT_LABELS_MASK] = { .type = NLA_BINARY,
.len = 128 / BITS_PER_BYTE },
+ [TCA_FLOWER_FLAGS] = { .type = NLA_U32 },
};
static const struct nla_policy
@@ -2765,12 +2766,17 @@ nla_put_failure:
return -EMSGSIZE;
}
-static void fl_bind_class(void *fh, u32 classid, unsigned long cl)
+static void fl_bind_class(void *fh, u32 classid, unsigned long cl, void *q,
+ unsigned long base)
{
struct cls_fl_filter *f = fh;
- if (f && f->res.classid == classid)
- f->res.class = cl;
+ if (f && f->res.classid == classid) {
+ if (cl)
+ __tcf_bind_filter(q, &f->res, base);
+ else
+ __tcf_unbind_filter(q, &f->res);
+ }
}
static bool fl_delete_empty(struct tcf_proto *tp)
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index c9496c920d6f..ec945294626a 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -419,12 +419,17 @@ nla_put_failure:
return -1;
}
-static void fw_bind_class(void *fh, u32 classid, unsigned long cl)
+static void fw_bind_class(void *fh, u32 classid, unsigned long cl, void *q,
+ unsigned long base)
{
struct fw_filter *f = fh;
- if (f && f->res.classid == classid)
- f->res.class = cl;
+ if (f && f->res.classid == classid) {
+ if (cl)
+ __tcf_bind_filter(q, &f->res, base);
+ else
+ __tcf_unbind_filter(q, &f->res);
+ }
}
static struct tcf_proto_ops cls_fw_ops __read_mostly = {
diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c
index 7fc2eb62aa98..610a0b728161 100644
--- a/net/sched/cls_matchall.c
+++ b/net/sched/cls_matchall.c
@@ -157,6 +157,7 @@ static void *mall_get(struct tcf_proto *tp, u32 handle)
static const struct nla_policy mall_policy[TCA_MATCHALL_MAX + 1] = {
[TCA_MATCHALL_UNSPEC] = { .type = NLA_UNSPEC },
[TCA_MATCHALL_CLASSID] = { .type = NLA_U32 },
+ [TCA_MATCHALL_FLAGS] = { .type = NLA_U32 },
};
static int mall_set_parms(struct net *net, struct tcf_proto *tp,
@@ -393,12 +394,17 @@ nla_put_failure:
return -1;
}
-static void mall_bind_class(void *fh, u32 classid, unsigned long cl)
+static void mall_bind_class(void *fh, u32 classid, unsigned long cl, void *q,
+ unsigned long base)
{
struct cls_mall_head *head = fh;
- if (head && head->res.classid == classid)
- head->res.class = cl;
+ if (head && head->res.classid == classid) {
+ if (cl)
+ __tcf_bind_filter(q, &head->res, base);
+ else
+ __tcf_unbind_filter(q, &head->res);
+ }
}
static struct tcf_proto_ops cls_mall_ops __read_mostly = {
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index 2d9e0b4484ea..6f8786b06bde 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -641,12 +641,17 @@ nla_put_failure:
return -1;
}
-static void route4_bind_class(void *fh, u32 classid, unsigned long cl)
+static void route4_bind_class(void *fh, u32 classid, unsigned long cl, void *q,
+ unsigned long base)
{
struct route4_filter *f = fh;
- if (f && f->res.classid == classid)
- f->res.class = cl;
+ if (f && f->res.classid == classid) {
+ if (cl)
+ __tcf_bind_filter(q, &f->res, base);
+ else
+ __tcf_unbind_filter(q, &f->res);
+ }
}
static struct tcf_proto_ops cls_route4_ops __read_mostly = {
diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h
index 2f3c03b25d5d..d36949d9382c 100644
--- a/net/sched/cls_rsvp.h
+++ b/net/sched/cls_rsvp.h
@@ -463,10 +463,8 @@ static u32 gen_tunnel(struct rsvp_head *data)
static const struct nla_policy rsvp_policy[TCA_RSVP_MAX + 1] = {
[TCA_RSVP_CLASSID] = { .type = NLA_U32 },
- [TCA_RSVP_DST] = { .type = NLA_BINARY,
- .len = RSVP_DST_LEN * sizeof(u32) },
- [TCA_RSVP_SRC] = { .type = NLA_BINARY,
- .len = RSVP_DST_LEN * sizeof(u32) },
+ [TCA_RSVP_DST] = { .len = RSVP_DST_LEN * sizeof(u32) },
+ [TCA_RSVP_SRC] = { .len = RSVP_DST_LEN * sizeof(u32) },
[TCA_RSVP_PINFO] = { .len = sizeof(struct tc_rsvp_pinfo) },
};
@@ -738,12 +736,17 @@ nla_put_failure:
return -1;
}
-static void rsvp_bind_class(void *fh, u32 classid, unsigned long cl)
+static void rsvp_bind_class(void *fh, u32 classid, unsigned long cl, void *q,
+ unsigned long base)
{
struct rsvp_filter *f = fh;
- if (f && f->res.classid == classid)
- f->res.class = cl;
+ if (f && f->res.classid == classid) {
+ if (cl)
+ __tcf_bind_filter(q, &f->res, base);
+ else
+ __tcf_unbind_filter(q, &f->res);
+ }
}
static struct tcf_proto_ops RSVP_OPS __read_mostly = {
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index e573e5a5c794..09b7dc5fe7e0 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -333,12 +333,31 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
cp->fall_through = p->fall_through;
cp->tp = tp;
+ if (tb[TCA_TCINDEX_HASH])
+ cp->hash = nla_get_u32(tb[TCA_TCINDEX_HASH]);
+
+ if (tb[TCA_TCINDEX_MASK])
+ cp->mask = nla_get_u16(tb[TCA_TCINDEX_MASK]);
+
+ if (tb[TCA_TCINDEX_SHIFT])
+ cp->shift = nla_get_u32(tb[TCA_TCINDEX_SHIFT]);
+
+ if (!cp->hash) {
+ /* Hash not specified, use perfect hash if the upper limit
+ * of the hashing index is below the threshold.
+ */
+ if ((cp->mask >> cp->shift) < PERFECT_HASH_THRESHOLD)
+ cp->hash = (cp->mask >> cp->shift) + 1;
+ else
+ cp->hash = DEFAULT_HASH_SIZE;
+ }
+
if (p->perfect) {
int i;
if (tcindex_alloc_perfect_hash(net, cp) < 0)
goto errout;
- for (i = 0; i < cp->hash; i++)
+ for (i = 0; i < min(cp->hash, p->hash); i++)
cp->perfect[i].res = p->perfect[i].res;
balloc = 1;
}
@@ -346,19 +365,10 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
err = tcindex_filter_result_init(&new_filter_result, net);
if (err < 0)
- goto errout1;
+ goto errout_alloc;
if (old_r)
cr = r->res;
- if (tb[TCA_TCINDEX_HASH])
- cp->hash = nla_get_u32(tb[TCA_TCINDEX_HASH]);
-
- if (tb[TCA_TCINDEX_MASK])
- cp->mask = nla_get_u16(tb[TCA_TCINDEX_MASK]);
-
- if (tb[TCA_TCINDEX_SHIFT])
- cp->shift = nla_get_u32(tb[TCA_TCINDEX_SHIFT]);
-
err = -EBUSY;
/* Hash already allocated, make sure that we still meet the
@@ -376,16 +386,6 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
if (tb[TCA_TCINDEX_FALL_THROUGH])
cp->fall_through = nla_get_u32(tb[TCA_TCINDEX_FALL_THROUGH]);
- if (!cp->hash) {
- /* Hash not specified, use perfect hash if the upper limit
- * of the hashing index is below the threshold.
- */
- if ((cp->mask >> cp->shift) < PERFECT_HASH_THRESHOLD)
- cp->hash = (cp->mask >> cp->shift) + 1;
- else
- cp->hash = DEFAULT_HASH_SIZE;
- }
-
if (!cp->perfect && !cp->h)
cp->alloc_hash = cp->hash;
@@ -484,7 +484,6 @@ errout_alloc:
tcindex_free_perfect_hash(cp);
else if (balloc == 2)
kfree(cp->h);
-errout1:
tcf_exts_destroy(&new_filter_result.exts);
errout:
kfree(cp);
@@ -654,12 +653,17 @@ nla_put_failure:
return -1;
}
-static void tcindex_bind_class(void *fh, u32 classid, unsigned long cl)
+static void tcindex_bind_class(void *fh, u32 classid, unsigned long cl,
+ void *q, unsigned long base)
{
struct tcindex_filter_result *r = fh;
- if (r && r->res.classid == classid)
- r->res.class = cl;
+ if (r && r->res.classid == classid) {
+ if (cl)
+ __tcf_bind_filter(q, &r->res, base);
+ else
+ __tcf_unbind_filter(q, &r->res);
+ }
}
static struct tcf_proto_ops cls_tcindex_ops __read_mostly = {
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index a0e6fac613de..e15ff335953d 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -1255,12 +1255,17 @@ static int u32_reoffload(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb,
return 0;
}
-static void u32_bind_class(void *fh, u32 classid, unsigned long cl)
+static void u32_bind_class(void *fh, u32 classid, unsigned long cl, void *q,
+ unsigned long base)
{
struct tc_u_knode *n = fh;
- if (n && n->res.classid == classid)
- n->res.class = cl;
+ if (n && n->res.classid == classid) {
+ if (cl)
+ __tcf_bind_filter(q, &n->res, base);
+ else
+ __tcf_unbind_filter(q, &n->res);
+ }
}
static int u32_dump(struct net *net, struct tcf_proto *tp, void *fh,
diff --git a/net/sched/ematch.c b/net/sched/ematch.c
index 8f2ad706784d..dd3b8c11a2e0 100644
--- a/net/sched/ematch.c
+++ b/net/sched/ematch.c
@@ -238,6 +238,9 @@ static int tcf_em_validate(struct tcf_proto *tp,
goto errout;
if (em->ops->change) {
+ err = -EINVAL;
+ if (em_hdr->flags & TCF_EM_SIMPLE)
+ goto errout;
err = em->ops->change(net, data, data_len, em);
if (err < 0)
goto errout;
@@ -263,12 +266,12 @@ static int tcf_em_validate(struct tcf_proto *tp,
}
em->data = (unsigned long) v;
}
+ em->datalen = data_len;
}
}
em->matchid = em_hdr->matchid;
em->flags = em_hdr->flags;
- em->datalen = data_len;
em->net = net;
err = 0;
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 1047825d9f48..50794125bf02 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1891,8 +1891,9 @@ static int tclass_del_notify(struct net *net,
struct tcf_bind_args {
struct tcf_walker w;
- u32 classid;
+ unsigned long base;
unsigned long cl;
+ u32 classid;
};
static int tcf_node_bind(struct tcf_proto *tp, void *n, struct tcf_walker *arg)
@@ -1903,28 +1904,30 @@ static int tcf_node_bind(struct tcf_proto *tp, void *n, struct tcf_walker *arg)
struct Qdisc *q = tcf_block_q(tp->chain->block);
sch_tree_lock(q);
- tp->ops->bind_class(n, a->classid, a->cl);
+ tp->ops->bind_class(n, a->classid, a->cl, q, a->base);
sch_tree_unlock(q);
}
return 0;
}
-static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid,
- unsigned long new_cl)
+struct tc_bind_class_args {
+ struct qdisc_walker w;
+ unsigned long new_cl;
+ u32 portid;
+ u32 clid;
+};
+
+static int tc_bind_class_walker(struct Qdisc *q, unsigned long cl,
+ struct qdisc_walker *w)
{
+ struct tc_bind_class_args *a = (struct tc_bind_class_args *)w;
const struct Qdisc_class_ops *cops = q->ops->cl_ops;
struct tcf_block *block;
struct tcf_chain *chain;
- unsigned long cl;
- cl = cops->find(q, portid);
- if (!cl)
- return;
- if (!cops->tcf_block)
- return;
block = cops->tcf_block(q, cl, NULL);
if (!block)
- return;
+ return 0;
for (chain = tcf_get_next_chain(block, NULL);
chain;
chain = tcf_get_next_chain(block, chain)) {
@@ -1935,11 +1938,29 @@ static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid,
struct tcf_bind_args arg = {};
arg.w.fn = tcf_node_bind;
- arg.classid = clid;
- arg.cl = new_cl;
+ arg.classid = a->clid;
+ arg.base = cl;
+ arg.cl = a->new_cl;
tp->ops->walk(tp, &arg.w, true);
}
}
+
+ return 0;
+}
+
+static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid,
+ unsigned long new_cl)
+{
+ const struct Qdisc_class_ops *cops = q->ops->cl_ops;
+ struct tc_bind_class_args args = {};
+
+ if (!cops->tcf_block)
+ return;
+ args.portid = portid;
+ args.clid = clid;
+ args.new_cl = new_cl;
+ args.w.fn = tc_bind_class_walker;
+ q->ops->cl_ops->walk(q, &args.w);
}
#else
diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c
index 6cc3ab145513..1496e87cd07b 100644
--- a/net/sched/sch_cake.c
+++ b/net/sched/sch_cake.c
@@ -1682,8 +1682,7 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch,
if (IS_ERR_OR_NULL(segs))
return qdisc_drop(skb, sch, to_free);
- while (segs) {
- nskb = segs->next;
+ skb_list_walk_safe(segs, segs, nskb) {
skb_mark_not_on_list(segs);
qdisc_skb_cb(segs)->pkt_len = segs->len;
cobalt_set_enqueue_time(segs, now);
@@ -1696,7 +1695,6 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch,
slen += segs->len;
q->buffer_used += segs->truesize;
b->packets++;
- segs = nskb;
}
/* stats */
@@ -1768,7 +1766,7 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch,
q->avg_window_begin));
u64 b = q->avg_window_bytes * (u64)NSEC_PER_SEC;
- do_div(b, window_interval);
+ b = div64_u64(b, window_interval);
q->avg_peak_bandwidth =
cake_ewma(q->avg_peak_bandwidth, b,
b > q->avg_peak_bandwidth ? 2 : 8);
diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c
index dba70377bbd9..a36974e9c601 100644
--- a/net/sched/sch_choke.c
+++ b/net/sched/sch_choke.c
@@ -377,7 +377,7 @@ static int choke_change(struct Qdisc *sch, struct nlattr *opt,
if (mask != q->tab_mask) {
struct sk_buff **ntab;
- ntab = kvmalloc_array((mask + 1), sizeof(struct sk_buff *), GFP_KERNEL | __GFP_ZERO);
+ ntab = kvcalloc(mask + 1, sizeof(struct sk_buff *), GFP_KERNEL);
if (!ntab)
return -ENOMEM;
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index ff4c5e9d0d77..a5a295477ecc 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -786,10 +786,12 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt,
if (tb[TCA_FQ_QUANTUM]) {
u32 quantum = nla_get_u32(tb[TCA_FQ_QUANTUM]);
- if (quantum > 0)
+ if (quantum > 0 && quantum <= (1 << 20)) {
q->quantum = quantum;
- else
+ } else {
+ NL_SET_ERR_MSG_MOD(extack, "invalid quantum");
err = -EINVAL;
+ }
}
if (tb[TCA_FQ_INITIAL_QUANTUM])
diff --git a/net/sched/sch_fq_pie.c b/net/sched/sch_fq_pie.c
new file mode 100644
index 000000000000..214657eb3dfd
--- /dev/null
+++ b/net/sched/sch_fq_pie.c
@@ -0,0 +1,562 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Flow Queue PIE discipline
+ *
+ * Copyright (C) 2019 Mohit P. Tahiliani <tahiliani@nitk.edu.in>
+ * Copyright (C) 2019 Sachin D. Patil <sdp.sachin@gmail.com>
+ * Copyright (C) 2019 V. Saicharan <vsaicharan1998@gmail.com>
+ * Copyright (C) 2019 Mohit Bhasi <mohitbhasi1998@gmail.com>
+ * Copyright (C) 2019 Leslie Monis <lesliemonis@gmail.com>
+ * Copyright (C) 2019 Gautam Ramakrishnan <gautamramk@gmail.com>
+ */
+
+#include <linux/jhash.h>
+#include <linux/sizes.h>
+#include <linux/vmalloc.h>
+#include <net/pkt_cls.h>
+#include <net/pie.h>
+
+/* Flow Queue PIE
+ *
+ * Principles:
+ * - Packets are classified on flows.
+ * - This is a Stochastic model (as we use a hash, several flows might
+ * be hashed to the same slot)
+ * - Each flow has a PIE managed queue.
+ * - Flows are linked onto two (Round Robin) lists,
+ * so that new flows have priority on old ones.
+ * - For a given flow, packets are not reordered.
+ * - Drops during enqueue only.
+ * - ECN capability is off by default.
+ * - ECN threshold (if ECN is enabled) is at 10% by default.
+ * - Uses timestamps to calculate queue delay by default.
+ */
+
+/**
+ * struct fq_pie_flow - contains data for each flow
+ * @vars: pie vars associated with the flow
+ * @deficit: number of remaining byte credits
+ * @backlog: size of data in the flow
+ * @qlen: number of packets in the flow
+ * @flowchain: flowchain for the flow
+ * @head: first packet in the flow
+ * @tail: last packet in the flow
+ */
+struct fq_pie_flow {
+ struct pie_vars vars;
+ s32 deficit;
+ u32 backlog;
+ u32 qlen;
+ struct list_head flowchain;
+ struct sk_buff *head;
+ struct sk_buff *tail;
+};
+
+struct fq_pie_sched_data {
+ struct tcf_proto __rcu *filter_list; /* optional external classifier */
+ struct tcf_block *block;
+ struct fq_pie_flow *flows;
+ struct Qdisc *sch;
+ struct list_head old_flows;
+ struct list_head new_flows;
+ struct pie_params p_params;
+ u32 ecn_prob;
+ u32 flows_cnt;
+ u32 quantum;
+ u32 memory_limit;
+ u32 new_flow_count;
+ u32 memory_usage;
+ u32 overmemory;
+ struct pie_stats stats;
+ struct timer_list adapt_timer;
+};
+
+static unsigned int fq_pie_hash(const struct fq_pie_sched_data *q,
+ struct sk_buff *skb)
+{
+ return reciprocal_scale(skb_get_hash(skb), q->flows_cnt);
+}
+
+static unsigned int fq_pie_classify(struct sk_buff *skb, struct Qdisc *sch,
+ int *qerr)
+{
+ struct fq_pie_sched_data *q = qdisc_priv(sch);
+ struct tcf_proto *filter;
+ struct tcf_result res;
+ int result;
+
+ if (TC_H_MAJ(skb->priority) == sch->handle &&
+ TC_H_MIN(skb->priority) > 0 &&
+ TC_H_MIN(skb->priority) <= q->flows_cnt)
+ return TC_H_MIN(skb->priority);
+
+ filter = rcu_dereference_bh(q->filter_list);
+ if (!filter)
+ return fq_pie_hash(q, skb) + 1;
+
+ *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
+ result = tcf_classify(skb, filter, &res, false);
+ if (result >= 0) {
+#ifdef CONFIG_NET_CLS_ACT
+ switch (result) {
+ case TC_ACT_STOLEN:
+ case TC_ACT_QUEUED:
+ case TC_ACT_TRAP:
+ *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
+ /* fall through */
+ case TC_ACT_SHOT:
+ return 0;
+ }
+#endif
+ if (TC_H_MIN(res.classid) <= q->flows_cnt)
+ return TC_H_MIN(res.classid);
+ }
+ return 0;
+}
+
+/* add skb to flow queue (tail add) */
+static inline void flow_queue_add(struct fq_pie_flow *flow,
+ struct sk_buff *skb)
+{
+ if (!flow->head)
+ flow->head = skb;
+ else
+ flow->tail->next = skb;
+ flow->tail = skb;
+ skb->next = NULL;
+}
+
+static int fq_pie_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+ struct sk_buff **to_free)
+{
+ struct fq_pie_sched_data *q = qdisc_priv(sch);
+ struct fq_pie_flow *sel_flow;
+ int uninitialized_var(ret);
+ u8 memory_limited = false;
+ u8 enqueue = false;
+ u32 pkt_len;
+ u32 idx;
+
+ /* Classifies packet into corresponding flow */
+ idx = fq_pie_classify(skb, sch, &ret);
+ sel_flow = &q->flows[idx];
+
+ /* Checks whether adding a new packet would exceed memory limit */
+ get_pie_cb(skb)->mem_usage = skb->truesize;
+ memory_limited = q->memory_usage > q->memory_limit + skb->truesize;
+
+ /* Checks if the qdisc is full */
+ if (unlikely(qdisc_qlen(sch) >= sch->limit)) {
+ q->stats.overlimit++;
+ goto out;
+ } else if (unlikely(memory_limited)) {
+ q->overmemory++;
+ }
+
+ if (!pie_drop_early(sch, &q->p_params, &sel_flow->vars,
+ sel_flow->backlog, skb->len)) {
+ enqueue = true;
+ } else if (q->p_params.ecn &&
+ sel_flow->vars.prob <= (MAX_PROB / 100) * q->ecn_prob &&
+ INET_ECN_set_ce(skb)) {
+ /* If packet is ecn capable, mark it if drop probability
+ * is lower than the parameter ecn_prob, else drop it.
+ */
+ q->stats.ecn_mark++;
+ enqueue = true;
+ }
+ if (enqueue) {
+ /* Set enqueue time only when dq_rate_estimator is disabled. */
+ if (!q->p_params.dq_rate_estimator)
+ pie_set_enqueue_time(skb);
+
+ pkt_len = qdisc_pkt_len(skb);
+ q->stats.packets_in++;
+ q->memory_usage += skb->truesize;
+ sch->qstats.backlog += pkt_len;
+ sch->q.qlen++;
+ flow_queue_add(sel_flow, skb);
+ if (list_empty(&sel_flow->flowchain)) {
+ list_add_tail(&sel_flow->flowchain, &q->new_flows);
+ q->new_flow_count++;
+ sel_flow->deficit = q->quantum;
+ sel_flow->qlen = 0;
+ sel_flow->backlog = 0;
+ }
+ sel_flow->qlen++;
+ sel_flow->backlog += pkt_len;
+ return NET_XMIT_SUCCESS;
+ }
+out:
+ q->stats.dropped++;
+ sel_flow->vars.accu_prob = 0;
+ sel_flow->vars.accu_prob_overflows = 0;
+ __qdisc_drop(skb, to_free);
+ qdisc_qstats_drop(sch);
+ return NET_XMIT_CN;
+}
+
+static const struct nla_policy fq_pie_policy[TCA_FQ_PIE_MAX + 1] = {
+ [TCA_FQ_PIE_LIMIT] = {.type = NLA_U32},
+ [TCA_FQ_PIE_FLOWS] = {.type = NLA_U32},
+ [TCA_FQ_PIE_TARGET] = {.type = NLA_U32},
+ [TCA_FQ_PIE_TUPDATE] = {.type = NLA_U32},
+ [TCA_FQ_PIE_ALPHA] = {.type = NLA_U32},
+ [TCA_FQ_PIE_BETA] = {.type = NLA_U32},
+ [TCA_FQ_PIE_QUANTUM] = {.type = NLA_U32},
+ [TCA_FQ_PIE_MEMORY_LIMIT] = {.type = NLA_U32},
+ [TCA_FQ_PIE_ECN_PROB] = {.type = NLA_U32},
+ [TCA_FQ_PIE_ECN] = {.type = NLA_U32},
+ [TCA_FQ_PIE_BYTEMODE] = {.type = NLA_U32},
+ [TCA_FQ_PIE_DQ_RATE_ESTIMATOR] = {.type = NLA_U32},
+};
+
+static inline struct sk_buff *dequeue_head(struct fq_pie_flow *flow)
+{
+ struct sk_buff *skb = flow->head;
+
+ flow->head = skb->next;
+ skb->next = NULL;
+ return skb;
+}
+
+static struct sk_buff *fq_pie_qdisc_dequeue(struct Qdisc *sch)
+{
+ struct fq_pie_sched_data *q = qdisc_priv(sch);
+ struct sk_buff *skb = NULL;
+ struct fq_pie_flow *flow;
+ struct list_head *head;
+ u32 pkt_len;
+
+begin:
+ head = &q->new_flows;
+ if (list_empty(head)) {
+ head = &q->old_flows;
+ if (list_empty(head))
+ return NULL;
+ }
+
+ flow = list_first_entry(head, struct fq_pie_flow, flowchain);
+ /* Flow has exhausted all its credits */
+ if (flow->deficit <= 0) {
+ flow->deficit += q->quantum;
+ list_move_tail(&flow->flowchain, &q->old_flows);
+ goto begin;
+ }
+
+ if (flow->head) {
+ skb = dequeue_head(flow);
+ pkt_len = qdisc_pkt_len(skb);
+ sch->qstats.backlog -= pkt_len;
+ sch->q.qlen--;
+ qdisc_bstats_update(sch, skb);
+ }
+
+ if (!skb) {
+ /* force a pass through old_flows to prevent starvation */
+ if (head == &q->new_flows && !list_empty(&q->old_flows))
+ list_move_tail(&flow->flowchain, &q->old_flows);
+ else
+ list_del_init(&flow->flowchain);
+ goto begin;
+ }
+
+ flow->qlen--;
+ flow->deficit -= pkt_len;
+ flow->backlog -= pkt_len;
+ q->memory_usage -= get_pie_cb(skb)->mem_usage;
+ pie_process_dequeue(skb, &q->p_params, &flow->vars, flow->backlog);
+ return skb;
+}
+
+static int fq_pie_change(struct Qdisc *sch, struct nlattr *opt,
+ struct netlink_ext_ack *extack)
+{
+ struct fq_pie_sched_data *q = qdisc_priv(sch);
+ struct nlattr *tb[TCA_FQ_PIE_MAX + 1];
+ unsigned int len_dropped = 0;
+ unsigned int num_dropped = 0;
+ int err;
+
+ if (!opt)
+ return -EINVAL;
+
+ err = nla_parse_nested(tb, TCA_FQ_PIE_MAX, opt, fq_pie_policy, extack);
+ if (err < 0)
+ return err;
+
+ sch_tree_lock(sch);
+ if (tb[TCA_FQ_PIE_LIMIT]) {
+ u32 limit = nla_get_u32(tb[TCA_FQ_PIE_LIMIT]);
+
+ q->p_params.limit = limit;
+ sch->limit = limit;
+ }
+ if (tb[TCA_FQ_PIE_FLOWS]) {
+ if (q->flows) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Number of flows cannot be changed");
+ goto flow_error;
+ }
+ q->flows_cnt = nla_get_u32(tb[TCA_FQ_PIE_FLOWS]);
+ if (!q->flows_cnt || q->flows_cnt > 65536) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Number of flows must be < 65536");
+ goto flow_error;
+ }
+ }
+
+ /* convert from microseconds to pschedtime */
+ if (tb[TCA_FQ_PIE_TARGET]) {
+ /* target is in us */
+ u32 target = nla_get_u32(tb[TCA_FQ_PIE_TARGET]);
+
+ /* convert to pschedtime */
+ q->p_params.target =
+ PSCHED_NS2TICKS((u64)target * NSEC_PER_USEC);
+ }
+
+ /* tupdate is in jiffies */
+ if (tb[TCA_FQ_PIE_TUPDATE])
+ q->p_params.tupdate =
+ usecs_to_jiffies(nla_get_u32(tb[TCA_FQ_PIE_TUPDATE]));
+
+ if (tb[TCA_FQ_PIE_ALPHA])
+ q->p_params.alpha = nla_get_u32(tb[TCA_FQ_PIE_ALPHA]);
+
+ if (tb[TCA_FQ_PIE_BETA])
+ q->p_params.beta = nla_get_u32(tb[TCA_FQ_PIE_BETA]);
+
+ if (tb[TCA_FQ_PIE_QUANTUM])
+ q->quantum = nla_get_u32(tb[TCA_FQ_PIE_QUANTUM]);
+
+ if (tb[TCA_FQ_PIE_MEMORY_LIMIT])
+ q->memory_limit = nla_get_u32(tb[TCA_FQ_PIE_MEMORY_LIMIT]);
+
+ if (tb[TCA_FQ_PIE_ECN_PROB])
+ q->ecn_prob = nla_get_u32(tb[TCA_FQ_PIE_ECN_PROB]);
+
+ if (tb[TCA_FQ_PIE_ECN])
+ q->p_params.ecn = nla_get_u32(tb[TCA_FQ_PIE_ECN]);
+
+ if (tb[TCA_FQ_PIE_BYTEMODE])
+ q->p_params.bytemode = nla_get_u32(tb[TCA_FQ_PIE_BYTEMODE]);
+
+ if (tb[TCA_FQ_PIE_DQ_RATE_ESTIMATOR])
+ q->p_params.dq_rate_estimator =
+ nla_get_u32(tb[TCA_FQ_PIE_DQ_RATE_ESTIMATOR]);
+
+ /* Drop excess packets if new limit is lower */
+ while (sch->q.qlen > sch->limit) {
+ struct sk_buff *skb = fq_pie_qdisc_dequeue(sch);
+
+ len_dropped += qdisc_pkt_len(skb);
+ num_dropped += 1;
+ rtnl_kfree_skbs(skb, skb);
+ }
+ qdisc_tree_reduce_backlog(sch, num_dropped, len_dropped);
+
+ sch_tree_unlock(sch);
+ return 0;
+
+flow_error:
+ sch_tree_unlock(sch);
+ return -EINVAL;
+}
+
+static void fq_pie_timer(struct timer_list *t)
+{
+ struct fq_pie_sched_data *q = from_timer(q, t, adapt_timer);
+ struct Qdisc *sch = q->sch;
+ spinlock_t *root_lock; /* to lock qdisc for probability calculations */
+ u16 idx;
+
+ root_lock = qdisc_lock(qdisc_root_sleeping(sch));
+ spin_lock(root_lock);
+
+ for (idx = 0; idx < q->flows_cnt; idx++)
+ pie_calculate_probability(&q->p_params, &q->flows[idx].vars,
+ q->flows[idx].backlog);
+
+ /* reset the timer to fire after 'tupdate' jiffies. */
+ if (q->p_params.tupdate)
+ mod_timer(&q->adapt_timer, jiffies + q->p_params.tupdate);
+
+ spin_unlock(root_lock);
+}
+
+static int fq_pie_init(struct Qdisc *sch, struct nlattr *opt,
+ struct netlink_ext_ack *extack)
+{
+ struct fq_pie_sched_data *q = qdisc_priv(sch);
+ int err;
+ u16 idx;
+
+ pie_params_init(&q->p_params);
+ sch->limit = 10 * 1024;
+ q->p_params.limit = sch->limit;
+ q->quantum = psched_mtu(qdisc_dev(sch));
+ q->sch = sch;
+ q->ecn_prob = 10;
+ q->flows_cnt = 1024;
+ q->memory_limit = SZ_32M;
+
+ INIT_LIST_HEAD(&q->new_flows);
+ INIT_LIST_HEAD(&q->old_flows);
+
+ if (opt) {
+ err = fq_pie_change(sch, opt, extack);
+
+ if (err)
+ return err;
+ }
+
+ err = tcf_block_get(&q->block, &q->filter_list, sch, extack);
+ if (err)
+ goto init_failure;
+
+ q->flows = kvcalloc(q->flows_cnt, sizeof(struct fq_pie_flow),
+ GFP_KERNEL);
+ if (!q->flows) {
+ err = -ENOMEM;
+ goto init_failure;
+ }
+ for (idx = 0; idx < q->flows_cnt; idx++) {
+ struct fq_pie_flow *flow = q->flows + idx;
+
+ INIT_LIST_HEAD(&flow->flowchain);
+ pie_vars_init(&flow->vars);
+ }
+
+ timer_setup(&q->adapt_timer, fq_pie_timer, 0);
+ mod_timer(&q->adapt_timer, jiffies + HZ / 2);
+
+ return 0;
+
+init_failure:
+ q->flows_cnt = 0;
+
+ return err;
+}
+
+static int fq_pie_dump(struct Qdisc *sch, struct sk_buff *skb)
+{
+ struct fq_pie_sched_data *q = qdisc_priv(sch);
+ struct nlattr *opts;
+
+ opts = nla_nest_start(skb, TCA_OPTIONS);
+ if (!opts)
+ return -EMSGSIZE;
+
+ /* convert target from pschedtime to us */
+ if (nla_put_u32(skb, TCA_FQ_PIE_LIMIT, sch->limit) ||
+ nla_put_u32(skb, TCA_FQ_PIE_FLOWS, q->flows_cnt) ||
+ nla_put_u32(skb, TCA_FQ_PIE_TARGET,
+ ((u32)PSCHED_TICKS2NS(q->p_params.target)) /
+ NSEC_PER_USEC) ||
+ nla_put_u32(skb, TCA_FQ_PIE_TUPDATE,
+ jiffies_to_usecs(q->p_params.tupdate)) ||
+ nla_put_u32(skb, TCA_FQ_PIE_ALPHA, q->p_params.alpha) ||
+ nla_put_u32(skb, TCA_FQ_PIE_BETA, q->p_params.beta) ||
+ nla_put_u32(skb, TCA_FQ_PIE_QUANTUM, q->quantum) ||
+ nla_put_u32(skb, TCA_FQ_PIE_MEMORY_LIMIT, q->memory_limit) ||
+ nla_put_u32(skb, TCA_FQ_PIE_ECN_PROB, q->ecn_prob) ||
+ nla_put_u32(skb, TCA_FQ_PIE_ECN, q->p_params.ecn) ||
+ nla_put_u32(skb, TCA_FQ_PIE_BYTEMODE, q->p_params.bytemode) ||
+ nla_put_u32(skb, TCA_FQ_PIE_DQ_RATE_ESTIMATOR,
+ q->p_params.dq_rate_estimator))
+ goto nla_put_failure;
+
+ return nla_nest_end(skb, opts);
+
+nla_put_failure:
+ nla_nest_cancel(skb, opts);
+ return -EMSGSIZE;
+}
+
+static int fq_pie_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
+{
+ struct fq_pie_sched_data *q = qdisc_priv(sch);
+ struct tc_fq_pie_xstats st = {
+ .packets_in = q->stats.packets_in,
+ .overlimit = q->stats.overlimit,
+ .overmemory = q->overmemory,
+ .dropped = q->stats.dropped,
+ .ecn_mark = q->stats.ecn_mark,
+ .new_flow_count = q->new_flow_count,
+ .memory_usage = q->memory_usage,
+ };
+ struct list_head *pos;
+
+ sch_tree_lock(sch);
+ list_for_each(pos, &q->new_flows)
+ st.new_flows_len++;
+
+ list_for_each(pos, &q->old_flows)
+ st.old_flows_len++;
+ sch_tree_unlock(sch);
+
+ return gnet_stats_copy_app(d, &st, sizeof(st));
+}
+
+static void fq_pie_reset(struct Qdisc *sch)
+{
+ struct fq_pie_sched_data *q = qdisc_priv(sch);
+ u16 idx;
+
+ INIT_LIST_HEAD(&q->new_flows);
+ INIT_LIST_HEAD(&q->old_flows);
+ for (idx = 0; idx < q->flows_cnt; idx++) {
+ struct fq_pie_flow *flow = q->flows + idx;
+
+ /* Removes all packets from flow */
+ rtnl_kfree_skbs(flow->head, flow->tail);
+ flow->head = NULL;
+
+ INIT_LIST_HEAD(&flow->flowchain);
+ pie_vars_init(&flow->vars);
+ }
+
+ sch->q.qlen = 0;
+ sch->qstats.backlog = 0;
+}
+
+static void fq_pie_destroy(struct Qdisc *sch)
+{
+ struct fq_pie_sched_data *q = qdisc_priv(sch);
+
+ tcf_block_put(q->block);
+ del_timer_sync(&q->adapt_timer);
+ kvfree(q->flows);
+}
+
+static struct Qdisc_ops fq_pie_qdisc_ops __read_mostly = {
+ .id = "fq_pie",
+ .priv_size = sizeof(struct fq_pie_sched_data),
+ .enqueue = fq_pie_qdisc_enqueue,
+ .dequeue = fq_pie_qdisc_dequeue,
+ .peek = qdisc_peek_dequeued,
+ .init = fq_pie_init,
+ .destroy = fq_pie_destroy,
+ .reset = fq_pie_reset,
+ .change = fq_pie_change,
+ .dump = fq_pie_dump,
+ .dump_stats = fq_pie_dump_stats,
+ .owner = THIS_MODULE,
+};
+
+static int __init fq_pie_module_init(void)
+{
+ return register_qdisc(&fq_pie_qdisc_ops);
+}
+
+static void __exit fq_pie_module_exit(void)
+{
+ unregister_qdisc(&fq_pie_qdisc_ops);
+}
+
+module_init(fq_pie_module_init);
+module_exit(fq_pie_module_exit);
+
+MODULE_DESCRIPTION("Flow Queue Proportional Integral controller Enhanced (FQ-PIE)");
+MODULE_AUTHOR("Mohit P. Tahiliani");
+MODULE_LICENSE("GPL");
diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c
index b0b0dc46af61..915bcdb59a9f 100644
--- a/net/sched/sch_pie.c
+++ b/net/sched/sch_pie.c
@@ -19,159 +19,76 @@
#include <linux/skbuff.h>
#include <net/pkt_sched.h>
#include <net/inet_ecn.h>
-
-#define QUEUE_THRESHOLD 16384
-#define DQCOUNT_INVALID -1
-#define DTIME_INVALID 0xffffffffffffffff
-#define MAX_PROB 0xffffffffffffffff
-#define PIE_SCALE 8
-
-/* parameters used */
-struct pie_params {
- psched_time_t target; /* user specified target delay in pschedtime */
- u32 tupdate; /* timer frequency (in jiffies) */
- u32 limit; /* number of packets that can be enqueued */
- u32 alpha; /* alpha and beta are between 0 and 32 */
- u32 beta; /* and are used for shift relative to 1 */
- bool ecn; /* true if ecn is enabled */
- bool bytemode; /* to scale drop early prob based on pkt size */
- u8 dq_rate_estimator; /* to calculate delay using Little's law */
-};
-
-/* variables used */
-struct pie_vars {
- u64 prob; /* probability but scaled by u64 limit. */
- psched_time_t burst_time;
- psched_time_t qdelay;
- psched_time_t qdelay_old;
- u64 dq_count; /* measured in bytes */
- psched_time_t dq_tstamp; /* drain rate */
- u64 accu_prob; /* accumulated drop probability */
- u32 avg_dq_rate; /* bytes per pschedtime tick,scaled */
- u32 qlen_old; /* in bytes */
- u8 accu_prob_overflows; /* overflows of accu_prob */
-};
-
-/* statistics gathering */
-struct pie_stats {
- u32 packets_in; /* total number of packets enqueued */
- u32 dropped; /* packets dropped due to pie_action */
- u32 overlimit; /* dropped due to lack of space in queue */
- u32 maxq; /* maximum queue size */
- u32 ecn_mark; /* packets marked with ECN */
-};
+#include <net/pie.h>
/* private data for the Qdisc */
struct pie_sched_data {
- struct pie_params params;
struct pie_vars vars;
+ struct pie_params params;
struct pie_stats stats;
struct timer_list adapt_timer;
struct Qdisc *sch;
};
-static void pie_params_init(struct pie_params *params)
+bool pie_drop_early(struct Qdisc *sch, struct pie_params *params,
+ struct pie_vars *vars, u32 qlen, u32 packet_size)
{
- params->alpha = 2;
- params->beta = 20;
- params->tupdate = usecs_to_jiffies(15 * USEC_PER_MSEC); /* 15 ms */
- params->limit = 1000; /* default of 1000 packets */
- params->target = PSCHED_NS2TICKS(15 * NSEC_PER_MSEC); /* 15 ms */
- params->ecn = false;
- params->bytemode = false;
- params->dq_rate_estimator = false;
-}
-
-/* private skb vars */
-struct pie_skb_cb {
- psched_time_t enqueue_time;
-};
-
-static struct pie_skb_cb *get_pie_cb(const struct sk_buff *skb)
-{
- qdisc_cb_private_validate(skb, sizeof(struct pie_skb_cb));
- return (struct pie_skb_cb *)qdisc_skb_cb(skb)->data;
-}
-
-static psched_time_t pie_get_enqueue_time(const struct sk_buff *skb)
-{
- return get_pie_cb(skb)->enqueue_time;
-}
-
-static void pie_set_enqueue_time(struct sk_buff *skb)
-{
- get_pie_cb(skb)->enqueue_time = psched_get_time();
-}
-
-static void pie_vars_init(struct pie_vars *vars)
-{
- vars->dq_count = DQCOUNT_INVALID;
- vars->dq_tstamp = DTIME_INVALID;
- vars->accu_prob = 0;
- vars->avg_dq_rate = 0;
- /* default of 150 ms in pschedtime */
- vars->burst_time = PSCHED_NS2TICKS(150 * NSEC_PER_MSEC);
- vars->accu_prob_overflows = 0;
-}
-
-static bool drop_early(struct Qdisc *sch, u32 packet_size)
-{
- struct pie_sched_data *q = qdisc_priv(sch);
u64 rnd;
- u64 local_prob = q->vars.prob;
+ u64 local_prob = vars->prob;
u32 mtu = psched_mtu(qdisc_dev(sch));
/* If there is still burst allowance left skip random early drop */
- if (q->vars.burst_time > 0)
+ if (vars->burst_time > 0)
return false;
/* If current delay is less than half of target, and
* if drop prob is low already, disable early_drop
*/
- if ((q->vars.qdelay < q->params.target / 2) &&
- (q->vars.prob < MAX_PROB / 5))
+ if ((vars->qdelay < params->target / 2) &&
+ (vars->prob < MAX_PROB / 5))
return false;
- /* If we have fewer than 2 mtu-sized packets, disable drop_early,
+ /* If we have fewer than 2 mtu-sized packets, disable pie_drop_early,
* similar to min_th in RED
*/
- if (sch->qstats.backlog < 2 * mtu)
+ if (qlen < 2 * mtu)
return false;
/* If bytemode is turned on, use packet size to compute new
* probablity. Smaller packets will have lower drop prob in this case
*/
- if (q->params.bytemode && packet_size <= mtu)
+ if (params->bytemode && packet_size <= mtu)
local_prob = (u64)packet_size * div_u64(local_prob, mtu);
else
- local_prob = q->vars.prob;
+ local_prob = vars->prob;
if (local_prob == 0) {
- q->vars.accu_prob = 0;
- q->vars.accu_prob_overflows = 0;
+ vars->accu_prob = 0;
+ vars->accu_prob_overflows = 0;
}
- if (local_prob > MAX_PROB - q->vars.accu_prob)
- q->vars.accu_prob_overflows++;
+ if (local_prob > MAX_PROB - vars->accu_prob)
+ vars->accu_prob_overflows++;
- q->vars.accu_prob += local_prob;
+ vars->accu_prob += local_prob;
- if (q->vars.accu_prob_overflows == 0 &&
- q->vars.accu_prob < (MAX_PROB / 100) * 85)
+ if (vars->accu_prob_overflows == 0 &&
+ vars->accu_prob < (MAX_PROB / 100) * 85)
return false;
- if (q->vars.accu_prob_overflows == 8 &&
- q->vars.accu_prob >= MAX_PROB / 2)
+ if (vars->accu_prob_overflows == 8 &&
+ vars->accu_prob >= MAX_PROB / 2)
return true;
prandom_bytes(&rnd, 8);
if (rnd < local_prob) {
- q->vars.accu_prob = 0;
- q->vars.accu_prob_overflows = 0;
+ vars->accu_prob = 0;
+ vars->accu_prob_overflows = 0;
return true;
}
return false;
}
+EXPORT_SYMBOL_GPL(pie_drop_early);
static int pie_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
struct sk_buff **to_free)
@@ -184,7 +101,8 @@ static int pie_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
goto out;
}
- if (!drop_early(sch, skb->len)) {
+ if (!pie_drop_early(sch, &q->params, &q->vars, sch->qstats.backlog,
+ skb->len)) {
enqueue = true;
} else if (q->params.ecn && (q->vars.prob <= MAX_PROB / 10) &&
INET_ECN_set_ce(skb)) {
@@ -216,14 +134,14 @@ out:
}
static const struct nla_policy pie_policy[TCA_PIE_MAX + 1] = {
- [TCA_PIE_TARGET] = {.type = NLA_U32},
- [TCA_PIE_LIMIT] = {.type = NLA_U32},
- [TCA_PIE_TUPDATE] = {.type = NLA_U32},
- [TCA_PIE_ALPHA] = {.type = NLA_U32},
- [TCA_PIE_BETA] = {.type = NLA_U32},
- [TCA_PIE_ECN] = {.type = NLA_U32},
- [TCA_PIE_BYTEMODE] = {.type = NLA_U32},
- [TCA_PIE_DQ_RATE_ESTIMATOR] = {.type = NLA_U32},
+ [TCA_PIE_TARGET] = {.type = NLA_U32},
+ [TCA_PIE_LIMIT] = {.type = NLA_U32},
+ [TCA_PIE_TUPDATE] = {.type = NLA_U32},
+ [TCA_PIE_ALPHA] = {.type = NLA_U32},
+ [TCA_PIE_BETA] = {.type = NLA_U32},
+ [TCA_PIE_ECN] = {.type = NLA_U32},
+ [TCA_PIE_BYTEMODE] = {.type = NLA_U32},
+ [TCA_PIE_DQ_RATE_ESTIMATOR] = {.type = NLA_U32},
};
static int pie_change(struct Qdisc *sch, struct nlattr *opt,
@@ -296,26 +214,25 @@ static int pie_change(struct Qdisc *sch, struct nlattr *opt,
return 0;
}
-static void pie_process_dequeue(struct Qdisc *sch, struct sk_buff *skb)
+void pie_process_dequeue(struct sk_buff *skb, struct pie_params *params,
+ struct pie_vars *vars, u32 qlen)
{
- struct pie_sched_data *q = qdisc_priv(sch);
- int qlen = sch->qstats.backlog; /* current queue size in bytes */
psched_time_t now = psched_get_time();
u32 dtime = 0;
/* If dq_rate_estimator is disabled, calculate qdelay using the
* packet timestamp.
*/
- if (!q->params.dq_rate_estimator) {
- q->vars.qdelay = now - pie_get_enqueue_time(skb);
+ if (!params->dq_rate_estimator) {
+ vars->qdelay = now - pie_get_enqueue_time(skb);
- if (q->vars.dq_tstamp != DTIME_INVALID)
- dtime = now - q->vars.dq_tstamp;
+ if (vars->dq_tstamp != DTIME_INVALID)
+ dtime = now - vars->dq_tstamp;
- q->vars.dq_tstamp = now;
+ vars->dq_tstamp = now;
if (qlen == 0)
- q->vars.qdelay = 0;
+ vars->qdelay = 0;
if (dtime == 0)
return;
@@ -327,39 +244,39 @@ static void pie_process_dequeue(struct Qdisc *sch, struct sk_buff *skb)
* we have enough packets to calculate the drain rate. Save
* current time as dq_tstamp and start measurement cycle.
*/
- if (qlen >= QUEUE_THRESHOLD && q->vars.dq_count == DQCOUNT_INVALID) {
- q->vars.dq_tstamp = psched_get_time();
- q->vars.dq_count = 0;
+ if (qlen >= QUEUE_THRESHOLD && vars->dq_count == DQCOUNT_INVALID) {
+ vars->dq_tstamp = psched_get_time();
+ vars->dq_count = 0;
}
- /* Calculate the average drain rate from this value. If queue length
- * has receded to a small value viz., <= QUEUE_THRESHOLD bytes,reset
+ /* Calculate the average drain rate from this value. If queue length
+ * has receded to a small value viz., <= QUEUE_THRESHOLD bytes, reset
* the dq_count to -1 as we don't have enough packets to calculate the
- * drain rate anymore The following if block is entered only when we
+ * drain rate anymore. The following if block is entered only when we
* have a substantial queue built up (QUEUE_THRESHOLD bytes or more)
* and we calculate the drain rate for the threshold here. dq_count is
* in bytes, time difference in psched_time, hence rate is in
* bytes/psched_time.
*/
- if (q->vars.dq_count != DQCOUNT_INVALID) {
- q->vars.dq_count += skb->len;
+ if (vars->dq_count != DQCOUNT_INVALID) {
+ vars->dq_count += skb->len;
- if (q->vars.dq_count >= QUEUE_THRESHOLD) {
- u32 count = q->vars.dq_count << PIE_SCALE;
+ if (vars->dq_count >= QUEUE_THRESHOLD) {
+ u32 count = vars->dq_count << PIE_SCALE;
- dtime = now - q->vars.dq_tstamp;
+ dtime = now - vars->dq_tstamp;
if (dtime == 0)
return;
count = count / dtime;
- if (q->vars.avg_dq_rate == 0)
- q->vars.avg_dq_rate = count;
+ if (vars->avg_dq_rate == 0)
+ vars->avg_dq_rate = count;
else
- q->vars.avg_dq_rate =
- (q->vars.avg_dq_rate -
- (q->vars.avg_dq_rate >> 3)) + (count >> 3);
+ vars->avg_dq_rate =
+ (vars->avg_dq_rate -
+ (vars->avg_dq_rate >> 3)) + (count >> 3);
/* If the queue has receded below the threshold, we hold
* on to the last drain rate calculated, else we reset
@@ -367,10 +284,10 @@ static void pie_process_dequeue(struct Qdisc *sch, struct sk_buff *skb)
* packet is dequeued
*/
if (qlen < QUEUE_THRESHOLD) {
- q->vars.dq_count = DQCOUNT_INVALID;
+ vars->dq_count = DQCOUNT_INVALID;
} else {
- q->vars.dq_count = 0;
- q->vars.dq_tstamp = psched_get_time();
+ vars->dq_count = 0;
+ vars->dq_tstamp = psched_get_time();
}
goto burst_allowance_reduction;
@@ -380,18 +297,18 @@ static void pie_process_dequeue(struct Qdisc *sch, struct sk_buff *skb)
return;
burst_allowance_reduction:
- if (q->vars.burst_time > 0) {
- if (q->vars.burst_time > dtime)
- q->vars.burst_time -= dtime;
+ if (vars->burst_time > 0) {
+ if (vars->burst_time > dtime)
+ vars->burst_time -= dtime;
else
- q->vars.burst_time = 0;
+ vars->burst_time = 0;
}
}
+EXPORT_SYMBOL_GPL(pie_process_dequeue);
-static void calculate_probability(struct Qdisc *sch)
+void pie_calculate_probability(struct pie_params *params, struct pie_vars *vars,
+ u32 qlen)
{
- struct pie_sched_data *q = qdisc_priv(sch);
- u32 qlen = sch->qstats.backlog; /* queue size in bytes */
psched_time_t qdelay = 0; /* in pschedtime */
psched_time_t qdelay_old = 0; /* in pschedtime */
s64 delta = 0; /* determines the change in probability */
@@ -400,21 +317,21 @@ static void calculate_probability(struct Qdisc *sch)
u32 power;
bool update_prob = true;
- if (q->params.dq_rate_estimator) {
- qdelay_old = q->vars.qdelay;
- q->vars.qdelay_old = q->vars.qdelay;
+ if (params->dq_rate_estimator) {
+ qdelay_old = vars->qdelay;
+ vars->qdelay_old = vars->qdelay;
- if (q->vars.avg_dq_rate > 0)
- qdelay = (qlen << PIE_SCALE) / q->vars.avg_dq_rate;
+ if (vars->avg_dq_rate > 0)
+ qdelay = (qlen << PIE_SCALE) / vars->avg_dq_rate;
else
qdelay = 0;
} else {
- qdelay = q->vars.qdelay;
- qdelay_old = q->vars.qdelay_old;
+ qdelay = vars->qdelay;
+ qdelay_old = vars->qdelay_old;
}
- /* If qdelay is zero and qlen is not, it means qlen is very small, less
- * than dequeue_rate, so we do not update probabilty in this round
+ /* If qdelay is zero and qlen is not, it means qlen is very small,
+ * so we do not update probabilty in this round.
*/
if (qdelay == 0 && qlen != 0)
update_prob = false;
@@ -426,18 +343,18 @@ static void calculate_probability(struct Qdisc *sch)
* probability. alpha/beta are updated locally below by scaling down
* by 16 to come to 0-2 range.
*/
- alpha = ((u64)q->params.alpha * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 4;
- beta = ((u64)q->params.beta * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 4;
+ alpha = ((u64)params->alpha * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 4;
+ beta = ((u64)params->beta * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 4;
/* We scale alpha and beta differently depending on how heavy the
* congestion is. Please see RFC 8033 for details.
*/
- if (q->vars.prob < MAX_PROB / 10) {
+ if (vars->prob < MAX_PROB / 10) {
alpha >>= 1;
beta >>= 1;
power = 100;
- while (q->vars.prob < div_u64(MAX_PROB, power) &&
+ while (vars->prob < div_u64(MAX_PROB, power) &&
power <= 1000000) {
alpha >>= 2;
beta >>= 2;
@@ -446,14 +363,14 @@ static void calculate_probability(struct Qdisc *sch)
}
/* alpha and beta should be between 0 and 32, in multiples of 1/16 */
- delta += alpha * (u64)(qdelay - q->params.target);
+ delta += alpha * (u64)(qdelay - params->target);
delta += beta * (u64)(qdelay - qdelay_old);
- oldprob = q->vars.prob;
+ oldprob = vars->prob;
/* to ensure we increase probability in steps of no more than 2% */
if (delta > (s64)(MAX_PROB / (100 / 2)) &&
- q->vars.prob >= MAX_PROB / 10)
+ vars->prob >= MAX_PROB / 10)
delta = (MAX_PROB / 100) * 2;
/* Non-linear drop:
@@ -464,12 +381,12 @@ static void calculate_probability(struct Qdisc *sch)
if (qdelay > (PSCHED_NS2TICKS(250 * NSEC_PER_MSEC)))
delta += MAX_PROB / (100 / 2);
- q->vars.prob += delta;
+ vars->prob += delta;
if (delta > 0) {
/* prevent overflow */
- if (q->vars.prob < oldprob) {
- q->vars.prob = MAX_PROB;
+ if (vars->prob < oldprob) {
+ vars->prob = MAX_PROB;
/* Prevent normalization error. If probability is at
* maximum value already, we normalize it here, and
* skip the check to do a non-linear drop in the next
@@ -479,8 +396,8 @@ static void calculate_probability(struct Qdisc *sch)
}
} else {
/* prevent underflow */
- if (q->vars.prob > oldprob)
- q->vars.prob = 0;
+ if (vars->prob > oldprob)
+ vars->prob = 0;
}
/* Non-linear drop in probability: Reduce drop probability quickly if
@@ -489,10 +406,10 @@ static void calculate_probability(struct Qdisc *sch)
if (qdelay == 0 && qdelay_old == 0 && update_prob)
/* Reduce drop probability to 98.4% */
- q->vars.prob -= q->vars.prob / 64u;
+ vars->prob -= vars->prob / 64;
- q->vars.qdelay = qdelay;
- q->vars.qlen_old = qlen;
+ vars->qdelay = qdelay;
+ vars->qlen_old = qlen;
/* We restart the measurement cycle if the following conditions are met
* 1. If the delay has been low for 2 consecutive Tupdate periods
@@ -500,16 +417,17 @@ static void calculate_probability(struct Qdisc *sch)
* 3. If average dq_rate_estimator is enabled, we have atleast one
* estimate for the avg_dq_rate ie., is a non-zero value
*/
- if ((q->vars.qdelay < q->params.target / 2) &&
- (q->vars.qdelay_old < q->params.target / 2) &&
- q->vars.prob == 0 &&
- (!q->params.dq_rate_estimator || q->vars.avg_dq_rate > 0)) {
- pie_vars_init(&q->vars);
+ if ((vars->qdelay < params->target / 2) &&
+ (vars->qdelay_old < params->target / 2) &&
+ vars->prob == 0 &&
+ (!params->dq_rate_estimator || vars->avg_dq_rate > 0)) {
+ pie_vars_init(vars);
}
- if (!q->params.dq_rate_estimator)
- q->vars.qdelay_old = qdelay;
+ if (!params->dq_rate_estimator)
+ vars->qdelay_old = qdelay;
}
+EXPORT_SYMBOL_GPL(pie_calculate_probability);
static void pie_timer(struct timer_list *t)
{
@@ -518,7 +436,7 @@ static void pie_timer(struct timer_list *t)
spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
spin_lock(root_lock);
- calculate_probability(sch);
+ pie_calculate_probability(&q->params, &q->vars, sch->qstats.backlog);
/* reset the timer to fire after 'tupdate'. tupdate is in jiffies. */
if (q->params.tupdate)
@@ -607,12 +525,13 @@ static int pie_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
static struct sk_buff *pie_qdisc_dequeue(struct Qdisc *sch)
{
+ struct pie_sched_data *q = qdisc_priv(sch);
struct sk_buff *skb = qdisc_dequeue_head(sch);
if (!skb)
return NULL;
- pie_process_dequeue(sch, skb);
+ pie_process_dequeue(skb, &q->params, &q->vars, sch->qstats.backlog);
return skb;
}
@@ -633,7 +552,7 @@ static void pie_destroy(struct Qdisc *sch)
}
static struct Qdisc_ops pie_qdisc_ops __read_mostly = {
- .id = "pie",
+ .id = "pie",
.priv_size = sizeof(struct pie_sched_data),
.enqueue = pie_qdisc_enqueue,
.dequeue = pie_qdisc_dequeue,
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index 18b884cfdfe8..647941702f9f 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -292,8 +292,14 @@ static int prio_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
struct tc_prio_qopt_offload graft_offload;
unsigned long band = arg - 1;
- if (new == NULL)
- new = &noop_qdisc;
+ if (!new) {
+ new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
+ TC_H_MAKE(sch->handle, arg), extack);
+ if (!new)
+ new = &noop_qdisc;
+ else
+ qdisc_hash_add(new, true);
+ }
*old = qdisc_replace(sch, new, &q->queues[band]);
diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
index c609373c8661..660fc45ee40f 100644
--- a/net/sched/sch_taprio.c
+++ b/net/sched/sch_taprio.c
@@ -31,6 +31,7 @@ static DEFINE_SPINLOCK(taprio_list_lock);
#define TXTIME_ASSIST_IS_ENABLED(flags) ((flags) & TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST)
#define FULL_OFFLOAD_IS_ENABLED(flags) ((flags) & TCA_TAPRIO_ATTR_FLAG_FULL_OFFLOAD)
+#define TAPRIO_FLAGS_INVALID U32_MAX
struct sched_entry {
struct list_head list;
@@ -766,6 +767,7 @@ static const struct nla_policy taprio_policy[TCA_TAPRIO_ATTR_MAX + 1] = {
[TCA_TAPRIO_ATTR_SCHED_CLOCKID] = { .type = NLA_S32 },
[TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME] = { .type = NLA_S64 },
[TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION] = { .type = NLA_S64 },
+ [TCA_TAPRIO_ATTR_FLAGS] = { .type = NLA_U32 },
};
static int fill_sched_entry(struct nlattr **tb, struct sched_entry *entry,
@@ -1367,6 +1369,33 @@ static int taprio_mqprio_cmp(const struct net_device *dev,
return 0;
}
+/* The semantics of the 'flags' argument in relation to 'change()'
+ * requests, are interpreted following two rules (which are applied in
+ * this order): (1) an omitted 'flags' argument is interpreted as
+ * zero; (2) the 'flags' of a "running" taprio instance cannot be
+ * changed.
+ */
+static int taprio_new_flags(const struct nlattr *attr, u32 old,
+ struct netlink_ext_ack *extack)
+{
+ u32 new = 0;
+
+ if (attr)
+ new = nla_get_u32(attr);
+
+ if (old != TAPRIO_FLAGS_INVALID && old != new) {
+ NL_SET_ERR_MSG_MOD(extack, "Changing 'flags' of a running schedule is not supported");
+ return -EOPNOTSUPP;
+ }
+
+ if (!taprio_flags_valid(new)) {
+ NL_SET_ERR_MSG_MOD(extack, "Specified 'flags' are not valid");
+ return -EINVAL;
+ }
+
+ return new;
+}
+
static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
struct netlink_ext_ack *extack)
{
@@ -1375,7 +1404,6 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
struct taprio_sched *q = qdisc_priv(sch);
struct net_device *dev = qdisc_dev(sch);
struct tc_mqprio_qopt *mqprio = NULL;
- u32 taprio_flags = 0;
unsigned long flags;
ktime_t start;
int i, err;
@@ -1388,21 +1416,14 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
if (tb[TCA_TAPRIO_ATTR_PRIOMAP])
mqprio = nla_data(tb[TCA_TAPRIO_ATTR_PRIOMAP]);
- if (tb[TCA_TAPRIO_ATTR_FLAGS]) {
- taprio_flags = nla_get_u32(tb[TCA_TAPRIO_ATTR_FLAGS]);
-
- if (q->flags != 0 && q->flags != taprio_flags) {
- NL_SET_ERR_MSG_MOD(extack, "Changing 'flags' of a running schedule is not supported");
- return -EOPNOTSUPP;
- } else if (!taprio_flags_valid(taprio_flags)) {
- NL_SET_ERR_MSG_MOD(extack, "Specified 'flags' are not valid");
- return -EINVAL;
- }
+ err = taprio_new_flags(tb[TCA_TAPRIO_ATTR_FLAGS],
+ q->flags, extack);
+ if (err < 0)
+ return err;
- q->flags = taprio_flags;
- }
+ q->flags = err;
- err = taprio_parse_mqprio_opt(dev, mqprio, extack, taprio_flags);
+ err = taprio_parse_mqprio_opt(dev, mqprio, extack, q->flags);
if (err < 0)
return err;
@@ -1444,7 +1465,20 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
taprio_set_picos_per_byte(dev, q);
- if (FULL_OFFLOAD_IS_ENABLED(taprio_flags))
+ if (mqprio) {
+ netdev_set_num_tc(dev, mqprio->num_tc);
+ for (i = 0; i < mqprio->num_tc; i++)
+ netdev_set_tc_queue(dev, i,
+ mqprio->count[i],
+ mqprio->offset[i]);
+
+ /* Always use supplied priority mappings */
+ for (i = 0; i <= TC_BITMASK; i++)
+ netdev_set_prio_tc_map(dev, i,
+ mqprio->prio_tc_map[i]);
+ }
+
+ if (FULL_OFFLOAD_IS_ENABLED(q->flags))
err = taprio_enable_offload(dev, mqprio, q, new_admin, extack);
else
err = taprio_disable_offload(dev, q, extack);
@@ -1464,27 +1498,14 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
q->txtime_delay = nla_get_u32(tb[TCA_TAPRIO_ATTR_TXTIME_DELAY]);
}
- if (!TXTIME_ASSIST_IS_ENABLED(taprio_flags) &&
- !FULL_OFFLOAD_IS_ENABLED(taprio_flags) &&
+ if (!TXTIME_ASSIST_IS_ENABLED(q->flags) &&
+ !FULL_OFFLOAD_IS_ENABLED(q->flags) &&
!hrtimer_active(&q->advance_timer)) {
hrtimer_init(&q->advance_timer, q->clockid, HRTIMER_MODE_ABS);
q->advance_timer.function = advance_sched;
}
- if (mqprio) {
- netdev_set_num_tc(dev, mqprio->num_tc);
- for (i = 0; i < mqprio->num_tc; i++)
- netdev_set_tc_queue(dev, i,
- mqprio->count[i],
- mqprio->offset[i]);
-
- /* Always use supplied priority mappings */
- for (i = 0; i <= TC_BITMASK; i++)
- netdev_set_prio_tc_map(dev, i,
- mqprio->prio_tc_map[i]);
- }
-
- if (FULL_OFFLOAD_IS_ENABLED(taprio_flags)) {
+ if (FULL_OFFLOAD_IS_ENABLED(q->flags)) {
q->dequeue = taprio_dequeue_offload;
q->peek = taprio_peek_offload;
} else {
@@ -1501,9 +1522,9 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
goto unlock;
}
- if (TXTIME_ASSIST_IS_ENABLED(taprio_flags)) {
- setup_txtime(q, new_admin, start);
+ setup_txtime(q, new_admin, start);
+ if (TXTIME_ASSIST_IS_ENABLED(q->flags)) {
if (!oper) {
rcu_assign_pointer(q->oper_sched, new_admin);
err = 0;
@@ -1528,7 +1549,7 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
spin_unlock_irqrestore(&q->current_entry_lock, flags);
- if (FULL_OFFLOAD_IS_ENABLED(taprio_flags))
+ if (FULL_OFFLOAD_IS_ENABLED(q->flags))
taprio_offload_config_changed(q);
}
@@ -1567,7 +1588,7 @@ static void taprio_destroy(struct Qdisc *sch)
}
q->qdiscs = NULL;
- netdev_set_num_tc(dev, 0);
+ netdev_reset_tc(dev);
if (q->oper_sched)
call_rcu(&q->oper_sched->rcu, taprio_free_sched_cb);
@@ -1597,6 +1618,7 @@ static int taprio_init(struct Qdisc *sch, struct nlattr *opt,
* and get the valid one on taprio_change().
*/
q->clockid = -1;
+ q->flags = TAPRIO_FLAGS_INVALID;
spin_lock(&taprio_list_lock);
list_add(&q->taprio_list, &taprio_list);
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index 5f72f3f916a5..78e79029dc63 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -15,6 +15,7 @@
#include <linux/skbuff.h>
#include <net/netlink.h>
#include <net/sch_generic.h>
+#include <net/pkt_cls.h>
#include <net/pkt_sched.h>
@@ -137,6 +138,52 @@ static u64 psched_ns_t2l(const struct psched_ratecfg *r,
return len;
}
+static void tbf_offload_change(struct Qdisc *sch)
+{
+ struct tbf_sched_data *q = qdisc_priv(sch);
+ struct net_device *dev = qdisc_dev(sch);
+ struct tc_tbf_qopt_offload qopt;
+
+ if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
+ return;
+
+ qopt.command = TC_TBF_REPLACE;
+ qopt.handle = sch->handle;
+ qopt.parent = sch->parent;
+ qopt.replace_params.rate = q->rate;
+ qopt.replace_params.max_size = q->max_size;
+ qopt.replace_params.qstats = &sch->qstats;
+
+ dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_TBF, &qopt);
+}
+
+static void tbf_offload_destroy(struct Qdisc *sch)
+{
+ struct net_device *dev = qdisc_dev(sch);
+ struct tc_tbf_qopt_offload qopt;
+
+ if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
+ return;
+
+ qopt.command = TC_TBF_DESTROY;
+ qopt.handle = sch->handle;
+ qopt.parent = sch->parent;
+ dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_TBF, &qopt);
+}
+
+static int tbf_offload_dump(struct Qdisc *sch)
+{
+ struct tc_tbf_qopt_offload qopt;
+
+ qopt.command = TC_TBF_STATS;
+ qopt.handle = sch->handle;
+ qopt.parent = sch->parent;
+ qopt.stats.bstats = &sch->bstats;
+ qopt.stats.qstats = &sch->qstats;
+
+ return qdisc_offload_dump_helper(sch, TC_SETUP_QDISC_TBF, &qopt);
+}
+
/* GSO packet is too big, segment it so that tbf can transmit
* each segment in time
*/
@@ -155,8 +202,7 @@ static int tbf_segment(struct sk_buff *skb, struct Qdisc *sch,
return qdisc_drop(skb, sch, to_free);
nb = 0;
- while (segs) {
- nskb = segs->next;
+ skb_list_walk_safe(segs, segs, nskb) {
skb_mark_not_on_list(segs);
qdisc_skb_cb(segs)->pkt_len = segs->len;
len += segs->len;
@@ -167,7 +213,6 @@ static int tbf_segment(struct sk_buff *skb, struct Qdisc *sch,
} else {
nb++;
}
- segs = nskb;
}
sch->q.qlen += nb;
if (nb > 1)
@@ -409,6 +454,8 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt,
sch_tree_unlock(sch);
err = 0;
+
+ tbf_offload_change(sch);
done:
return err;
}
@@ -434,6 +481,7 @@ static void tbf_destroy(struct Qdisc *sch)
struct tbf_sched_data *q = qdisc_priv(sch);
qdisc_watchdog_cancel(&q->watchdog);
+ tbf_offload_destroy(sch);
qdisc_put(q->qdisc);
}
@@ -442,8 +490,12 @@ static int tbf_dump(struct Qdisc *sch, struct sk_buff *skb)
struct tbf_sched_data *q = qdisc_priv(sch);
struct nlattr *nest;
struct tc_tbf_qopt opt;
+ int err;
+
+ err = tbf_offload_dump(sch);
+ if (err)
+ return err;
- sch->qstats.backlog = q->qdisc->qstats.backlog;
nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
if (nest == NULL)
goto nla_put_failure;