summaryrefslogtreecommitdiff
path: root/net/sched
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2008-12-28 23:49:40 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2008-12-28 23:49:40 +0300
commit0191b625ca5a46206d2fb862bb08f36f2fcb3b31 (patch)
tree454d1842b1833d976da62abcbd5c47521ebe9bd7 /net/sched
parent54a696bd07c14d3b1192d03ce7269bc59b45209a (diff)
parenteb56092fc168bf5af199d47af50c0d84a96db898 (diff)
downloadlinux-0191b625ca5a46206d2fb862bb08f36f2fcb3b31.tar.xz
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next-2.6
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next-2.6: (1429 commits) net: Allow dependancies of FDDI & Tokenring to be modular. igb: Fix build warning when DCA is disabled. net: Fix warning fallout from recent NAPI interface changes. gro: Fix potential use after free sfc: If AN is enabled, always read speed/duplex from the AN advertising bits sfc: When disabling the NIC, close the device rather than unregistering it sfc: SFT9001: Add cable diagnostics sfc: Add support for multiple PHY self-tests sfc: Merge top-level functions for self-tests sfc: Clean up PHY mode management in loopback self-test sfc: Fix unreliable link detection in some loopback modes sfc: Generate unique names for per-NIC workqueues 802.3ad: use standard ethhdr instead of ad_header 802.3ad: generalize out mac address initializer 802.3ad: initialize ports LACPDU from const initializer 802.3ad: remove typedef around ad_system 802.3ad: turn ports is_individual into a bool 802.3ad: turn ports is_enabled into a bool 802.3ad: make ntt bool ixgbe: Fix set_ringparam in ixgbe to use the same memory pools. ... Fixed trivial IPv4/6 address printing conflicts in fs/cifs/connect.c due to the conversion to %pI (in this networking merge) and the addition of doing IPv6 addresses (from the earlier merge of CIFS).
Diffstat (limited to 'net/sched')
-rw-r--r--net/sched/Kconfig22
-rw-r--r--net/sched/Makefile2
-rw-r--r--net/sched/act_api.c18
-rw-r--r--net/sched/act_gact.c4
-rw-r--r--net/sched/act_ipt.c4
-rw-r--r--net/sched/act_mirred.c4
-rw-r--r--net/sched/act_nat.c4
-rw-r--r--net/sched/act_pedit.c4
-rw-r--r--net/sched/act_police.c33
-rw-r--r--net/sched/act_simple.c4
-rw-r--r--net/sched/act_skbedit.c4
-rw-r--r--net/sched/cls_api.c3
-rw-r--r--net/sched/cls_basic.c2
-rw-r--r--net/sched/cls_cgroup.c288
-rw-r--r--net/sched/cls_fw.c2
-rw-r--r--net/sched/cls_route.c2
-rw-r--r--net/sched/cls_tcindex.c6
-rw-r--r--net/sched/cls_u32.c11
-rw-r--r--net/sched/ematch.c18
-rw-r--r--net/sched/sch_api.c50
-rw-r--r--net/sched/sch_atm.c36
-rw-r--r--net/sched/sch_blackhole.c1
-rw-r--r--net/sched/sch_cbq.c76
-rw-r--r--net/sched/sch_drr.c519
-rw-r--r--net/sched/sch_dsmark.c22
-rw-r--r--net/sched/sch_fifo.c4
-rw-r--r--net/sched/sch_generic.c40
-rw-r--r--net/sched/sch_gred.c22
-rw-r--r--net/sched/sch_hfsc.c64
-rw-r--r--net/sched/sch_htb.c171
-rw-r--r--net/sched/sch_multiq.c82
-rw-r--r--net/sched/sch_netem.c160
-rw-r--r--net/sched/sch_prio.c50
-rw-r--r--net/sched/sch_red.c33
-rw-r--r--net/sched/sch_sfq.c71
-rw-r--r--net/sched/sch_tbf.c44
-rw-r--r--net/sched/sch_teql.c19
37 files changed, 1216 insertions, 683 deletions
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 6767e54155db..4f7ef0db302b 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -194,6 +194,17 @@ config NET_SCH_NETEM
If unsure, say N.
+config NET_SCH_DRR
+ tristate "Deficit Round Robin scheduler (DRR)"
+ help
+ Say Y here if you want to use the Deficit Round Robin (DRR) packet
+ scheduling algorithm.
+
+ To compile this driver as a module, choose M here: the module
+ will be called sch_drr.
+
+ If unsure, say N.
+
config NET_SCH_INGRESS
tristate "Ingress Qdisc"
depends on NET_CLS_ACT
@@ -316,6 +327,17 @@ config NET_CLS_FLOW
To compile this code as a module, choose M here: the
module will be called cls_flow.
+config NET_CLS_CGROUP
+ bool "Control Group Classifier"
+ select NET_CLS
+ depends on CGROUPS
+ ---help---
+ Say Y here if you want to classify packets based on the control
+ cgroup of their process.
+
+ To compile this code as a module, choose M here: the
+ module will be called cls_cgroup.
+
config NET_EMATCH
bool "Extended Matches"
select NET_CLS
diff --git a/net/sched/Makefile b/net/sched/Makefile
index e60c9925b269..54d950cd4b8d 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -30,6 +30,7 @@ obj-$(CONFIG_NET_SCH_PRIO) += sch_prio.o
obj-$(CONFIG_NET_SCH_MULTIQ) += sch_multiq.o
obj-$(CONFIG_NET_SCH_ATM) += sch_atm.o
obj-$(CONFIG_NET_SCH_NETEM) += sch_netem.o
+obj-$(CONFIG_NET_SCH_DRR) += sch_drr.o
obj-$(CONFIG_NET_CLS_U32) += cls_u32.o
obj-$(CONFIG_NET_CLS_ROUTE4) += cls_route.o
obj-$(CONFIG_NET_CLS_FW) += cls_fw.o
@@ -38,6 +39,7 @@ obj-$(CONFIG_NET_CLS_TCINDEX) += cls_tcindex.o
obj-$(CONFIG_NET_CLS_RSVP6) += cls_rsvp6.o
obj-$(CONFIG_NET_CLS_BASIC) += cls_basic.o
obj-$(CONFIG_NET_CLS_FLOW) += cls_flow.o
+obj-$(CONFIG_NET_CLS_CGROUP) += cls_cgroup.o
obj-$(CONFIG_NET_EMATCH) += ematch.o
obj-$(CONFIG_NET_EMATCH_CMP) += em_cmp.o
obj-$(CONFIG_NET_EMATCH_NBYTE) += em_nbyte.o
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 8f457f1e0acf..9d03cc33b6cc 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -214,12 +214,14 @@ struct tcf_common *tcf_hash_check(u32 index, struct tc_action *a, int bind,
}
EXPORT_SYMBOL(tcf_hash_check);
-struct tcf_common *tcf_hash_create(u32 index, struct nlattr *est, struct tc_action *a, int size, int bind, u32 *idx_gen, struct tcf_hashinfo *hinfo)
+struct tcf_common *tcf_hash_create(u32 index, struct nlattr *est,
+ struct tc_action *a, int size, int bind,
+ u32 *idx_gen, struct tcf_hashinfo *hinfo)
{
struct tcf_common *p = kzalloc(size, GFP_KERNEL);
if (unlikely(!p))
- return p;
+ return ERR_PTR(-ENOMEM);
p->tcfc_refcnt = 1;
if (bind)
p->tcfc_bindcnt = 1;
@@ -228,9 +230,15 @@ struct tcf_common *tcf_hash_create(u32 index, struct nlattr *est, struct tc_acti
p->tcfc_index = index ? index : tcf_hash_new_index(idx_gen, hinfo);
p->tcfc_tm.install = jiffies;
p->tcfc_tm.lastuse = jiffies;
- if (est)
- gen_new_estimator(&p->tcfc_bstats, &p->tcfc_rate_est,
- &p->tcfc_lock, est);
+ if (est) {
+ int err = gen_new_estimator(&p->tcfc_bstats, &p->tcfc_rate_est,
+ &p->tcfc_lock, est);
+ if (err) {
+ kfree(p);
+ return ERR_PTR(err);
+ }
+ }
+
a->priv = (void *) p;
return p;
}
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index ac04289da5d7..e7f796aec657 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -88,8 +88,8 @@ static int tcf_gact_init(struct nlattr *nla, struct nlattr *est,
if (!pc) {
pc = tcf_hash_create(parm->index, est, a, sizeof(*gact),
bind, &gact_idx_gen, &gact_hash_info);
- if (unlikely(!pc))
- return -ENOMEM;
+ if (IS_ERR(pc))
+ return PTR_ERR(pc);
ret = ACT_P_CREATED;
} else {
if (!ovr) {
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 0453d79ebf57..082c520b0def 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -136,8 +136,8 @@ static int tcf_ipt_init(struct nlattr *nla, struct nlattr *est,
if (!pc) {
pc = tcf_hash_create(index, est, a, sizeof(*ipt), bind,
&ipt_idx_gen, &ipt_hash_info);
- if (unlikely(!pc))
- return -ENOMEM;
+ if (IS_ERR(pc))
+ return PTR_ERR(pc);
ret = ACT_P_CREATED;
} else {
if (!ovr) {
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 70341c020b6d..b9aaab4e0354 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -105,8 +105,8 @@ static int tcf_mirred_init(struct nlattr *nla, struct nlattr *est,
return -EINVAL;
pc = tcf_hash_create(parm->index, est, a, sizeof(*m), bind,
&mirred_idx_gen, &mirred_hash_info);
- if (unlikely(!pc))
- return -ENOMEM;
+ if (IS_ERR(pc))
+ return PTR_ERR(pc);
ret = ACT_P_CREATED;
} else {
if (!ovr) {
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index 7b39ed485bca..d885ba311564 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -68,8 +68,8 @@ static int tcf_nat_init(struct nlattr *nla, struct nlattr *est,
if (!pc) {
pc = tcf_hash_create(parm->index, est, a, sizeof(*p), bind,
&nat_idx_gen, &nat_hash_info);
- if (unlikely(!pc))
- return -ENOMEM;
+ if (IS_ERR(pc))
+ return PTR_ERR(pc);
p = to_tcf_nat(pc);
ret = ACT_P_CREATED;
} else {
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index d5f4e3404864..96c0ed115e2a 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -68,8 +68,8 @@ static int tcf_pedit_init(struct nlattr *nla, struct nlattr *est,
return -EINVAL;
pc = tcf_hash_create(parm->index, est, a, sizeof(*p), bind,
&pedit_idx_gen, &pedit_hash_info);
- if (unlikely(!pc))
- return -ENOMEM;
+ if (IS_ERR(pc))
+ return PTR_ERR(pc);
p = to_pedit(pc);
keys = kmalloc(ksize, GFP_KERNEL);
if (keys == NULL) {
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 38015b493947..5c72a116b1a4 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -182,17 +182,32 @@ override:
R_tab = qdisc_get_rtab(&parm->rate, tb[TCA_POLICE_RATE]);
if (R_tab == NULL)
goto failure;
+
+ if (!est && (ret == ACT_P_CREATED ||
+ !gen_estimator_active(&police->tcf_bstats,
+ &police->tcf_rate_est))) {
+ err = -EINVAL;
+ goto failure;
+ }
+
if (parm->peakrate.rate) {
P_tab = qdisc_get_rtab(&parm->peakrate,
tb[TCA_POLICE_PEAKRATE]);
- if (P_tab == NULL) {
- qdisc_put_rtab(R_tab);
+ if (P_tab == NULL)
goto failure;
- }
}
}
- /* No failure allowed after this point */
+
spin_lock_bh(&police->tcf_lock);
+ if (est) {
+ err = gen_replace_estimator(&police->tcf_bstats,
+ &police->tcf_rate_est,
+ &police->tcf_lock, est);
+ if (err)
+ goto failure_unlock;
+ }
+
+ /* No failure allowed after this point */
if (R_tab != NULL) {
qdisc_put_rtab(police->tcfp_R_tab);
police->tcfp_R_tab = R_tab;
@@ -217,10 +232,6 @@ override:
if (tb[TCA_POLICE_AVRATE])
police->tcfp_ewma_rate = nla_get_u32(tb[TCA_POLICE_AVRATE]);
- if (est)
- gen_replace_estimator(&police->tcf_bstats,
- &police->tcf_rate_est,
- &police->tcf_lock, est);
spin_unlock_bh(&police->tcf_lock);
if (ret != ACT_P_CREATED)
@@ -238,7 +249,13 @@ override:
a->priv = police;
return ret;
+failure_unlock:
+ spin_unlock_bh(&police->tcf_lock);
failure:
+ if (P_tab)
+ qdisc_put_rtab(P_tab);
+ if (R_tab)
+ qdisc_put_rtab(R_tab);
if (ret == ACT_P_CREATED)
kfree(police);
return err;
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index e7851ce92cfe..8daa1ebc7413 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -124,8 +124,8 @@ static int tcf_simp_init(struct nlattr *nla, struct nlattr *est,
if (!pc) {
pc = tcf_hash_create(parm->index, est, a, sizeof(*d), bind,
&simp_idx_gen, &simp_hash_info);
- if (unlikely(!pc))
- return -ENOMEM;
+ if (IS_ERR(pc))
+ return PTR_ERR(pc);
d = to_defact(pc);
ret = alloc_defdata(d, defdata);
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index fe9777e77f35..4ab916b8074b 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -104,8 +104,8 @@ static int tcf_skbedit_init(struct nlattr *nla, struct nlattr *est,
if (!pc) {
pc = tcf_hash_create(parm->index, est, a, sizeof(*d), bind,
&skbedit_idx_gen, &skbedit_hash_info);
- if (unlikely(!pc))
- return -ENOMEM;
+ if (IS_ERR(pc))
+ return PTR_ERR(pc);
d = to_skbedit(pc);
ret = ACT_P_CREATED;
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 16e7ac9774e5..173fcc4b050d 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -531,7 +531,8 @@ void tcf_exts_change(struct tcf_proto *tp, struct tcf_exts *dst,
if (src->action) {
struct tc_action *act;
tcf_tree_lock(tp);
- act = xchg(&dst->action, src->action);
+ act = dst->action;
+ dst->action = src->action;
tcf_tree_unlock(tp);
if (act)
tcf_action_destroy(act, TCA_ACT_UNBIND);
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index 956915c217d6..4e2bda854119 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -102,7 +102,7 @@ static inline void basic_delete_filter(struct tcf_proto *tp,
static void basic_destroy(struct tcf_proto *tp)
{
- struct basic_head *head = (struct basic_head *) xchg(&tp->root, NULL);
+ struct basic_head *head = tp->root;
struct basic_filter *f, *n;
list_for_each_entry_safe(f, n, &head->flist, link) {
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
new file mode 100644
index 000000000000..0d68b1975983
--- /dev/null
+++ b/net/sched/cls_cgroup.c
@@ -0,0 +1,288 @@
+/*
+ * net/sched/cls_cgroup.c Control Group Classifier
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Authors: Thomas Graf <tgraf@suug.ch>
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/skbuff.h>
+#include <linux/cgroup.h>
+#include <net/rtnetlink.h>
+#include <net/pkt_cls.h>
+
+struct cgroup_cls_state
+{
+ struct cgroup_subsys_state css;
+ u32 classid;
+};
+
+static inline struct cgroup_cls_state *net_cls_state(struct cgroup *cgrp)
+{
+ return (struct cgroup_cls_state *)
+ cgroup_subsys_state(cgrp, net_cls_subsys_id);
+}
+
+static struct cgroup_subsys_state *cgrp_create(struct cgroup_subsys *ss,
+ struct cgroup *cgrp)
+{
+ struct cgroup_cls_state *cs;
+
+ if (!(cs = kzalloc(sizeof(*cs), GFP_KERNEL)))
+ return ERR_PTR(-ENOMEM);
+
+ if (cgrp->parent)
+ cs->classid = net_cls_state(cgrp->parent)->classid;
+
+ return &cs->css;
+}
+
+static void cgrp_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp)
+{
+ kfree(ss);
+}
+
+static u64 read_classid(struct cgroup *cgrp, struct cftype *cft)
+{
+ return net_cls_state(cgrp)->classid;
+}
+
+static int write_classid(struct cgroup *cgrp, struct cftype *cft, u64 value)
+{
+ if (!cgroup_lock_live_group(cgrp))
+ return -ENODEV;
+
+ net_cls_state(cgrp)->classid = (u32) value;
+
+ cgroup_unlock();
+
+ return 0;
+}
+
+static struct cftype ss_files[] = {
+ {
+ .name = "classid",
+ .read_u64 = read_classid,
+ .write_u64 = write_classid,
+ },
+};
+
+static int cgrp_populate(struct cgroup_subsys *ss, struct cgroup *cgrp)
+{
+ return cgroup_add_files(cgrp, ss, ss_files, ARRAY_SIZE(ss_files));
+}
+
+struct cgroup_subsys net_cls_subsys = {
+ .name = "net_cls",
+ .create = cgrp_create,
+ .destroy = cgrp_destroy,
+ .populate = cgrp_populate,
+ .subsys_id = net_cls_subsys_id,
+};
+
+struct cls_cgroup_head
+{
+ u32 handle;
+ struct tcf_exts exts;
+ struct tcf_ematch_tree ematches;
+};
+
+static int cls_cgroup_classify(struct sk_buff *skb, struct tcf_proto *tp,
+ struct tcf_result *res)
+{
+ struct cls_cgroup_head *head = tp->root;
+ struct cgroup_cls_state *cs;
+ int ret = 0;
+
+ /*
+ * Due to the nature of the classifier it is required to ignore all
+ * packets originating from softirq context as accessing `current'
+ * would lead to false results.
+ *
+ * This test assumes that all callers of dev_queue_xmit() explicitely
+ * disable bh. Knowing this, it is possible to detect softirq based
+ * calls by looking at the number of nested bh disable calls because
+ * softirqs always disables bh.
+ */
+ if (softirq_count() != SOFTIRQ_OFFSET)
+ return -1;
+
+ rcu_read_lock();
+ cs = (struct cgroup_cls_state *) task_subsys_state(current,
+ net_cls_subsys_id);
+ if (cs->classid && tcf_em_tree_match(skb, &head->ematches, NULL)) {
+ res->classid = cs->classid;
+ res->class = 0;
+ ret = tcf_exts_exec(skb, &head->exts, res);
+ } else
+ ret = -1;
+
+ rcu_read_unlock();
+
+ return ret;
+}
+
+static unsigned long cls_cgroup_get(struct tcf_proto *tp, u32 handle)
+{
+ return 0UL;
+}
+
+static void cls_cgroup_put(struct tcf_proto *tp, unsigned long f)
+{
+}
+
+static int cls_cgroup_init(struct tcf_proto *tp)
+{
+ return 0;
+}
+
+static const struct tcf_ext_map cgroup_ext_map = {
+ .action = TCA_CGROUP_ACT,
+ .police = TCA_CGROUP_POLICE,
+};
+
+static const struct nla_policy cgroup_policy[TCA_CGROUP_MAX + 1] = {
+ [TCA_CGROUP_EMATCHES] = { .type = NLA_NESTED },
+};
+
+static int cls_cgroup_change(struct tcf_proto *tp, unsigned long base,
+ u32 handle, struct nlattr **tca,
+ unsigned long *arg)
+{
+ struct nlattr *tb[TCA_CGROUP_MAX+1];
+ struct cls_cgroup_head *head = tp->root;
+ struct tcf_ematch_tree t;
+ struct tcf_exts e;
+ int err;
+
+ if (head == NULL) {
+ if (!handle)
+ return -EINVAL;
+
+ head = kzalloc(sizeof(*head), GFP_KERNEL);
+ if (head == NULL)
+ return -ENOBUFS;
+
+ head->handle = handle;
+
+ tcf_tree_lock(tp);
+ tp->root = head;
+ tcf_tree_unlock(tp);
+ }
+
+ if (handle != head->handle)
+ return -ENOENT;
+
+ err = nla_parse_nested(tb, TCA_CGROUP_MAX, tca[TCA_OPTIONS],
+ cgroup_policy);
+ if (err < 0)
+ return err;
+
+ err = tcf_exts_validate(tp, tb, tca[TCA_RATE], &e, &cgroup_ext_map);
+ if (err < 0)
+ return err;
+
+ err = tcf_em_tree_validate(tp, tb[TCA_CGROUP_EMATCHES], &t);
+ if (err < 0)
+ return err;
+
+ tcf_exts_change(tp, &head->exts, &e);
+ tcf_em_tree_change(tp, &head->ematches, &t);
+
+ return 0;
+}
+
+static void cls_cgroup_destroy(struct tcf_proto *tp)
+{
+ struct cls_cgroup_head *head = tp->root;
+
+ if (head) {
+ tcf_exts_destroy(tp, &head->exts);
+ tcf_em_tree_destroy(tp, &head->ematches);
+ kfree(head);
+ }
+}
+
+static int cls_cgroup_delete(struct tcf_proto *tp, unsigned long arg)
+{
+ return -EOPNOTSUPP;
+}
+
+static void cls_cgroup_walk(struct tcf_proto *tp, struct tcf_walker *arg)
+{
+ struct cls_cgroup_head *head = tp->root;
+
+ if (arg->count < arg->skip)
+ goto skip;
+
+ if (arg->fn(tp, (unsigned long) head, arg) < 0) {
+ arg->stop = 1;
+ return;
+ }
+skip:
+ arg->count++;
+}
+
+static int cls_cgroup_dump(struct tcf_proto *tp, unsigned long fh,
+ struct sk_buff *skb, struct tcmsg *t)
+{
+ struct cls_cgroup_head *head = tp->root;
+ unsigned char *b = skb_tail_pointer(skb);
+ struct nlattr *nest;
+
+ t->tcm_handle = head->handle;
+
+ nest = nla_nest_start(skb, TCA_OPTIONS);
+ if (nest == NULL)
+ goto nla_put_failure;
+
+ if (tcf_exts_dump(skb, &head->exts, &cgroup_ext_map) < 0 ||
+ tcf_em_tree_dump(skb, &head->ematches, TCA_CGROUP_EMATCHES) < 0)
+ goto nla_put_failure;
+
+ nla_nest_end(skb, nest);
+
+ if (tcf_exts_dump_stats(skb, &head->exts, &cgroup_ext_map) < 0)
+ goto nla_put_failure;
+
+ return skb->len;
+
+nla_put_failure:
+ nlmsg_trim(skb, b);
+ return -1;
+}
+
+static struct tcf_proto_ops cls_cgroup_ops __read_mostly = {
+ .kind = "cgroup",
+ .init = cls_cgroup_init,
+ .change = cls_cgroup_change,
+ .classify = cls_cgroup_classify,
+ .destroy = cls_cgroup_destroy,
+ .get = cls_cgroup_get,
+ .put = cls_cgroup_put,
+ .delete = cls_cgroup_delete,
+ .walk = cls_cgroup_walk,
+ .dump = cls_cgroup_dump,
+ .owner = THIS_MODULE,
+};
+
+static int __init init_cgroup_cls(void)
+{
+ return register_tcf_proto_ops(&cls_cgroup_ops);
+}
+
+static void __exit exit_cgroup_cls(void)
+{
+ unregister_tcf_proto_ops(&cls_cgroup_ops);
+}
+
+module_init(init_cgroup_cls);
+module_exit(exit_cgroup_cls);
+MODULE_LICENSE("GPL");
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index b0f90e593af0..6d6e87585fb1 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -148,7 +148,7 @@ fw_delete_filter(struct tcf_proto *tp, struct fw_filter *f)
static void fw_destroy(struct tcf_proto *tp)
{
- struct fw_head *head = (struct fw_head*)xchg(&tp->root, NULL);
+ struct fw_head *head = tp->root;
struct fw_filter *f;
int h;
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index e3d8455eebc2..bdf1f4172eef 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -260,7 +260,7 @@ route4_delete_filter(struct tcf_proto *tp, struct route4_filter *f)
static void route4_destroy(struct tcf_proto *tp)
{
- struct route4_head *head = xchg(&tp->root, NULL);
+ struct route4_head *head = tp->root;
int h1, h2;
if (head == NULL)
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index 7a7bff5ded24..e806f2314b5e 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -13,12 +13,6 @@
#include <net/netlink.h>
#include <net/pkt_cls.h>
-
-/*
- * Not quite sure if we need all the xchgs Alexey uses when accessing things.
- * Can always add them later ... :)
- */
-
/*
* Passing parameters to the root seems to be done more awkwardly than really
* necessary. At least, u32 doesn't seem to use such dirty hacks. To be
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 246f9065ce34..05d178008cbc 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -387,7 +387,7 @@ static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
static void u32_destroy(struct tcf_proto *tp)
{
struct tc_u_common *tp_c = tp->data;
- struct tc_u_hnode *root_ht = xchg(&tp->root, NULL);
+ struct tc_u_hnode *root_ht = tp->root;
WARN_ON(root_ht == NULL);
@@ -479,7 +479,7 @@ static int u32_set_parms(struct tcf_proto *tp, unsigned long base,
err = -EINVAL;
if (tb[TCA_U32_LINK]) {
u32 handle = nla_get_u32(tb[TCA_U32_LINK]);
- struct tc_u_hnode *ht_down = NULL;
+ struct tc_u_hnode *ht_down = NULL, *ht_old;
if (TC_U32_KEY(handle))
goto errout;
@@ -493,11 +493,12 @@ static int u32_set_parms(struct tcf_proto *tp, unsigned long base,
}
tcf_tree_lock(tp);
- ht_down = xchg(&n->ht_down, ht_down);
+ ht_old = n->ht_down;
+ n->ht_down = ht_down;
tcf_tree_unlock(tp);
- if (ht_down)
- ht_down->refcnt--;
+ if (ht_old)
+ ht_old->refcnt--;
}
if (tb[TCA_U32_CLASSID]) {
n->res.classid = nla_get_u32(tb[TCA_U32_CLASSID]);
diff --git a/net/sched/ematch.c b/net/sched/ematch.c
index e82519e548d7..aab59409728b 100644
--- a/net/sched/ematch.c
+++ b/net/sched/ematch.c
@@ -71,7 +71,7 @@
*
* static void __exit exit_my_ematch(void)
* {
- * return tcf_em_unregister(&my_ops);
+ * tcf_em_unregister(&my_ops);
* }
*
* module_init(init_my_ematch);
@@ -154,23 +154,11 @@ EXPORT_SYMBOL(tcf_em_register);
*
* Returns -ENOENT if no matching ematch was found.
*/
-int tcf_em_unregister(struct tcf_ematch_ops *ops)
+void tcf_em_unregister(struct tcf_ematch_ops *ops)
{
- int err = 0;
- struct tcf_ematch_ops *e;
-
write_lock(&ematch_mod_lock);
- list_for_each_entry(e, &ematch_ops, link) {
- if (e == ops) {
- list_del(&e->link);
- goto out;
- }
- }
-
- err = -ENOENT;
-out:
+ list_del(&ops->link);
write_unlock(&ematch_mod_lock);
- return err;
}
EXPORT_SYMBOL(tcf_em_unregister);
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 6ab4a2f92ca0..0fc4a18fd96f 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -97,10 +97,9 @@ static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n,
Auxiliary routines:
- ---requeue
+ ---peek
- requeues once dequeued packet. It is used for non-standard or
- just buggy devices, which can defer output even if netif_queue_stopped()=0.
+ like dequeue but without removing a packet from the queue
---reset
@@ -147,8 +146,14 @@ int register_qdisc(struct Qdisc_ops *qops)
if (qops->enqueue == NULL)
qops->enqueue = noop_qdisc_ops.enqueue;
- if (qops->requeue == NULL)
- qops->requeue = noop_qdisc_ops.requeue;
+ if (qops->peek == NULL) {
+ if (qops->dequeue == NULL) {
+ qops->peek = noop_qdisc_ops.peek;
+ } else {
+ rc = -EINVAL;
+ goto out;
+ }
+ }
if (qops->dequeue == NULL)
qops->dequeue = noop_qdisc_ops.dequeue;
@@ -184,7 +189,7 @@ EXPORT_SYMBOL(unregister_qdisc);
(root qdisc, all its children, children of children etc.)
*/
-struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
+static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
{
struct Qdisc *q;
@@ -199,28 +204,16 @@ struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
return NULL;
}
-/*
- * This lock is needed until some qdiscs stop calling qdisc_tree_decrease_qlen()
- * without rtnl_lock(); currently hfsc_dequeue(), netem_dequeue(), tbf_dequeue()
- */
-static DEFINE_SPINLOCK(qdisc_list_lock);
-
static void qdisc_list_add(struct Qdisc *q)
{
- if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
- spin_lock_bh(&qdisc_list_lock);
+ if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS))
list_add_tail(&q->list, &qdisc_root_sleeping(q)->list);
- spin_unlock_bh(&qdisc_list_lock);
- }
}
void qdisc_list_del(struct Qdisc *q)
{
- if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
- spin_lock_bh(&qdisc_list_lock);
+ if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS))
list_del(&q->list);
- spin_unlock_bh(&qdisc_list_lock);
- }
}
EXPORT_SYMBOL(qdisc_list_del);
@@ -229,22 +222,17 @@ struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
unsigned int i;
struct Qdisc *q;
- spin_lock_bh(&qdisc_list_lock);
-
for (i = 0; i < dev->num_tx_queues; i++) {
struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
struct Qdisc *txq_root = txq->qdisc_sleeping;
q = qdisc_match_from_root(txq_root, handle);
if (q)
- goto unlock;
+ goto out;
}
q = qdisc_match_from_root(dev->rx_queue.qdisc_sleeping, handle);
-
-unlock:
- spin_unlock_bh(&qdisc_list_lock);
-
+out:
return q;
}
@@ -462,7 +450,6 @@ static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
timer);
wd->qdisc->flags &= ~TCQ_F_THROTTLED;
- smp_wmb();
__netif_schedule(qdisc_root(wd->qdisc));
return HRTIMER_NORESTART;
@@ -892,9 +879,12 @@ static int qdisc_change(struct Qdisc *sch, struct nlattr **tca)
sch->stab = stab;
if (tca[TCA_RATE])
+ /* NB: ignores errors from replace_estimator
+ because change can't be undone. */
gen_replace_estimator(&sch->bstats, &sch->rate_est,
- qdisc_root_sleeping_lock(sch),
- tca[TCA_RATE]);
+ qdisc_root_sleeping_lock(sch),
+ tca[TCA_RATE]);
+
return 0;
}
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index 43d37256c15e..2a8b83af7c47 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -62,7 +62,7 @@ struct atm_qdisc_data {
struct atm_flow_data link; /* unclassified skbs go here */
struct atm_flow_data *flows; /* NB: "link" is also on this
list */
- struct tasklet_struct task; /* requeue tasklet */
+ struct tasklet_struct task; /* dequeue tasklet */
};
/* ------------------------- Class/flow operations ------------------------- */
@@ -102,7 +102,8 @@ static int atm_tc_graft(struct Qdisc *sch, unsigned long arg,
return -EINVAL;
if (!new)
new = &noop_qdisc;
- *old = xchg(&flow->q, new);
+ *old = flow->q;
+ flow->q = new;
if (*old)
qdisc_reset(*old);
return 0;
@@ -480,11 +481,14 @@ static void sch_atm_dequeue(unsigned long data)
* If traffic is properly shaped, this won't generate nasty
* little bursts. Otherwise, it may ... (but that's okay)
*/
- while ((skb = flow->q->dequeue(flow->q))) {
- if (!atm_may_send(flow->vcc, skb->truesize)) {
- (void)flow->q->ops->requeue(skb, flow->q);
+ while ((skb = flow->q->ops->peek(flow->q))) {
+ if (!atm_may_send(flow->vcc, skb->truesize))
break;
- }
+
+ skb = qdisc_dequeue_peeked(flow->q);
+ if (unlikely(!skb))
+ break;
+
pr_debug("atm_tc_dequeue: sending on class %p\n", flow);
/* remove any LL header somebody else has attached */
skb_pull(skb, skb_network_offset(skb));
@@ -516,27 +520,19 @@ static struct sk_buff *atm_tc_dequeue(struct Qdisc *sch)
pr_debug("atm_tc_dequeue(sch %p,[qdisc %p])\n", sch, p);
tasklet_schedule(&p->task);
- skb = p->link.q->dequeue(p->link.q);
+ skb = qdisc_dequeue_peeked(p->link.q);
if (skb)
sch->q.qlen--;
return skb;
}
-static int atm_tc_requeue(struct sk_buff *skb, struct Qdisc *sch)
+static struct sk_buff *atm_tc_peek(struct Qdisc *sch)
{
struct atm_qdisc_data *p = qdisc_priv(sch);
- int ret;
- pr_debug("atm_tc_requeue(skb %p,sch %p,[qdisc %p])\n", skb, sch, p);
- ret = p->link.q->ops->requeue(skb, p->link.q);
- if (!ret) {
- sch->q.qlen++;
- sch->qstats.requeues++;
- } else if (net_xmit_drop_count(ret)) {
- sch->qstats.drops++;
- p->link.qstats.drops++;
- }
- return ret;
+ pr_debug("atm_tc_peek(sch %p,[qdisc %p])\n", sch, p);
+
+ return p->link.q->ops->peek(p->link.q);
}
static unsigned int atm_tc_drop(struct Qdisc *sch)
@@ -694,7 +690,7 @@ static struct Qdisc_ops atm_qdisc_ops __read_mostly = {
.priv_size = sizeof(struct atm_qdisc_data),
.enqueue = atm_tc_enqueue,
.dequeue = atm_tc_dequeue,
- .requeue = atm_tc_requeue,
+ .peek = atm_tc_peek,
.drop = atm_tc_drop,
.init = atm_tc_init,
.reset = atm_tc_reset,
diff --git a/net/sched/sch_blackhole.c b/net/sched/sch_blackhole.c
index 507fb488bc98..094a874b48bc 100644
--- a/net/sched/sch_blackhole.c
+++ b/net/sched/sch_blackhole.c
@@ -33,6 +33,7 @@ static struct Qdisc_ops blackhole_qdisc_ops __read_mostly = {
.priv_size = 0,
.enqueue = blackhole_enqueue,
.dequeue = blackhole_dequeue,
+ .peek = blackhole_dequeue,
.owner = THIS_MODULE,
};
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 03e389e8d945..9e43ed949167 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -405,40 +405,6 @@ cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
return ret;
}
-static int
-cbq_requeue(struct sk_buff *skb, struct Qdisc *sch)
-{
- struct cbq_sched_data *q = qdisc_priv(sch);
- struct cbq_class *cl;
- int ret;
-
- if ((cl = q->tx_class) == NULL) {
- kfree_skb(skb);
- sch->qstats.drops++;
- return NET_XMIT_CN;
- }
- q->tx_class = NULL;
-
- cbq_mark_toplevel(q, cl);
-
-#ifdef CONFIG_NET_CLS_ACT
- q->rx_class = cl;
- cl->q->__parent = sch;
-#endif
- if ((ret = cl->q->ops->requeue(skb, cl->q)) == 0) {
- sch->q.qlen++;
- sch->qstats.requeues++;
- if (!cl->next_alive)
- cbq_activate_class(cl);
- return 0;
- }
- if (net_xmit_drop_count(ret)) {
- sch->qstats.drops++;
- cl->qstats.drops++;
- }
- return ret;
-}
-
/* Overlimit actions */
/* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
@@ -1669,7 +1635,8 @@ static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
#endif
}
sch_tree_lock(sch);
- *old = xchg(&cl->q, new);
+ *old = cl->q;
+ cl->q = new;
qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
qdisc_reset(*old);
sch_tree_unlock(sch);
@@ -1798,11 +1765,23 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
}
if (tb[TCA_CBQ_RATE]) {
- rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), tb[TCA_CBQ_RTAB]);
+ rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]),
+ tb[TCA_CBQ_RTAB]);
if (rtab == NULL)
return -EINVAL;
}
+ if (tca[TCA_RATE]) {
+ err = gen_replace_estimator(&cl->bstats, &cl->rate_est,
+ qdisc_root_sleeping_lock(sch),
+ tca[TCA_RATE]);
+ if (err) {
+ if (rtab)
+ qdisc_put_rtab(rtab);
+ return err;
+ }
+ }
+
/* Change class parameters */
sch_tree_lock(sch);
@@ -1810,8 +1789,8 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
cbq_deactivate_class(cl);
if (rtab) {
- rtab = xchg(&cl->R_tab, rtab);
- qdisc_put_rtab(rtab);
+ qdisc_put_rtab(cl->R_tab);
+ cl->R_tab = rtab;
}
if (tb[TCA_CBQ_LSSOPT])
@@ -1838,10 +1817,6 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
sch_tree_unlock(sch);
- if (tca[TCA_RATE])
- gen_replace_estimator(&cl->bstats, &cl->rate_est,
- qdisc_root_sleeping_lock(sch),
- tca[TCA_RATE]);
return 0;
}
@@ -1888,6 +1863,17 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
cl = kzalloc(sizeof(*cl), GFP_KERNEL);
if (cl == NULL)
goto failure;
+
+ if (tca[TCA_RATE]) {
+ err = gen_new_estimator(&cl->bstats, &cl->rate_est,
+ qdisc_root_sleeping_lock(sch),
+ tca[TCA_RATE]);
+ if (err) {
+ kfree(cl);
+ goto failure;
+ }
+ }
+
cl->R_tab = rtab;
rtab = NULL;
cl->refcnt = 1;
@@ -1929,10 +1915,6 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
qdisc_class_hash_grow(sch, &q->clhash);
- if (tca[TCA_RATE])
- gen_new_estimator(&cl->bstats, &cl->rate_est,
- qdisc_root_sleeping_lock(sch), tca[TCA_RATE]);
-
*arg = (unsigned long)cl;
return 0;
@@ -2066,7 +2048,7 @@ static struct Qdisc_ops cbq_qdisc_ops __read_mostly = {
.priv_size = sizeof(struct cbq_sched_data),
.enqueue = cbq_enqueue,
.dequeue = cbq_dequeue,
- .requeue = cbq_requeue,
+ .peek = qdisc_peek_dequeued,
.drop = cbq_drop,
.init = cbq_init,
.reset = cbq_reset,
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
new file mode 100644
index 000000000000..f6b4fa97df70
--- /dev/null
+++ b/net/sched/sch_drr.c
@@ -0,0 +1,519 @@
+/*
+ * net/sched/sch_drr.c Deficit Round Robin scheduler
+ *
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/netdevice.h>
+#include <linux/pkt_sched.h>
+#include <net/sch_generic.h>
+#include <net/pkt_sched.h>
+#include <net/pkt_cls.h>
+
+struct drr_class {
+ struct Qdisc_class_common common;
+ unsigned int refcnt;
+ unsigned int filter_cnt;
+
+ struct gnet_stats_basic bstats;
+ struct gnet_stats_queue qstats;
+ struct gnet_stats_rate_est rate_est;
+ struct list_head alist;
+ struct Qdisc *qdisc;
+
+ u32 quantum;
+ u32 deficit;
+};
+
+struct drr_sched {
+ struct list_head active;
+ struct tcf_proto *filter_list;
+ struct Qdisc_class_hash clhash;
+};
+
+static struct drr_class *drr_find_class(struct Qdisc *sch, u32 classid)
+{
+ struct drr_sched *q = qdisc_priv(sch);
+ struct Qdisc_class_common *clc;
+
+ clc = qdisc_class_find(&q->clhash, classid);
+ if (clc == NULL)
+ return NULL;
+ return container_of(clc, struct drr_class, common);
+}
+
+static void drr_purge_queue(struct drr_class *cl)
+{
+ unsigned int len = cl->qdisc->q.qlen;
+
+ qdisc_reset(cl->qdisc);
+ qdisc_tree_decrease_qlen(cl->qdisc, len);
+}
+
+static const struct nla_policy drr_policy[TCA_DRR_MAX + 1] = {
+ [TCA_DRR_QUANTUM] = { .type = NLA_U32 },
+};
+
+static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
+ struct nlattr **tca, unsigned long *arg)
+{
+ struct drr_sched *q = qdisc_priv(sch);
+ struct drr_class *cl = (struct drr_class *)*arg;
+ struct nlattr *tb[TCA_DRR_MAX + 1];
+ u32 quantum;
+ int err;
+
+ err = nla_parse_nested(tb, TCA_DRR_MAX, tca[TCA_OPTIONS], drr_policy);
+ if (err < 0)
+ return err;
+
+ if (tb[TCA_DRR_QUANTUM]) {
+ quantum = nla_get_u32(tb[TCA_DRR_QUANTUM]);
+ if (quantum == 0)
+ return -EINVAL;
+ } else
+ quantum = psched_mtu(qdisc_dev(sch));
+
+ if (cl != NULL) {
+ if (tca[TCA_RATE]) {
+ err = gen_replace_estimator(&cl->bstats, &cl->rate_est,
+ qdisc_root_sleeping_lock(sch),
+ tca[TCA_RATE]);
+ if (err)
+ return err;
+ }
+
+ sch_tree_lock(sch);
+ if (tb[TCA_DRR_QUANTUM])
+ cl->quantum = quantum;
+ sch_tree_unlock(sch);
+
+ return 0;
+ }
+
+ cl = kzalloc(sizeof(struct drr_class), GFP_KERNEL);
+ if (cl == NULL)
+ return -ENOBUFS;
+
+ cl->refcnt = 1;
+ cl->common.classid = classid;
+ cl->quantum = quantum;
+ cl->qdisc = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
+ &pfifo_qdisc_ops, classid);
+ if (cl->qdisc == NULL)
+ cl->qdisc = &noop_qdisc;
+
+ if (tca[TCA_RATE]) {
+ err = gen_replace_estimator(&cl->bstats, &cl->rate_est,
+ qdisc_root_sleeping_lock(sch),
+ tca[TCA_RATE]);
+ if (err) {
+ qdisc_destroy(cl->qdisc);
+ kfree(cl);
+ return err;
+ }
+ }
+
+ sch_tree_lock(sch);
+ qdisc_class_hash_insert(&q->clhash, &cl->common);
+ sch_tree_unlock(sch);
+
+ qdisc_class_hash_grow(sch, &q->clhash);
+
+ *arg = (unsigned long)cl;
+ return 0;
+}
+
+static void drr_destroy_class(struct Qdisc *sch, struct drr_class *cl)
+{
+ gen_kill_estimator(&cl->bstats, &cl->rate_est);
+ qdisc_destroy(cl->qdisc);
+ kfree(cl);
+}
+
+static int drr_delete_class(struct Qdisc *sch, unsigned long arg)
+{
+ struct drr_sched *q = qdisc_priv(sch);
+ struct drr_class *cl = (struct drr_class *)arg;
+
+ if (cl->filter_cnt > 0)
+ return -EBUSY;
+
+ sch_tree_lock(sch);
+
+ drr_purge_queue(cl);
+ qdisc_class_hash_remove(&q->clhash, &cl->common);
+
+ if (--cl->refcnt == 0)
+ drr_destroy_class(sch, cl);
+
+ sch_tree_unlock(sch);
+ return 0;
+}
+
+static unsigned long drr_get_class(struct Qdisc *sch, u32 classid)
+{
+ struct drr_class *cl = drr_find_class(sch, classid);
+
+ if (cl != NULL)
+ cl->refcnt++;
+
+ return (unsigned long)cl;
+}
+
+static void drr_put_class(struct Qdisc *sch, unsigned long arg)
+{
+ struct drr_class *cl = (struct drr_class *)arg;
+
+ if (--cl->refcnt == 0)
+ drr_destroy_class(sch, cl);
+}
+
+static struct tcf_proto **drr_tcf_chain(struct Qdisc *sch, unsigned long cl)
+{
+ struct drr_sched *q = qdisc_priv(sch);
+
+ if (cl)
+ return NULL;
+
+ return &q->filter_list;
+}
+
+static unsigned long drr_bind_tcf(struct Qdisc *sch, unsigned long parent,
+ u32 classid)
+{
+ struct drr_class *cl = drr_find_class(sch, classid);
+
+ if (cl != NULL)
+ cl->filter_cnt++;
+
+ return (unsigned long)cl;
+}
+
+static void drr_unbind_tcf(struct Qdisc *sch, unsigned long arg)
+{
+ struct drr_class *cl = (struct drr_class *)arg;
+
+ cl->filter_cnt--;
+}
+
+static int drr_graft_class(struct Qdisc *sch, unsigned long arg,
+ struct Qdisc *new, struct Qdisc **old)
+{
+ struct drr_class *cl = (struct drr_class *)arg;
+
+ if (new == NULL) {
+ new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
+ &pfifo_qdisc_ops, cl->common.classid);
+ if (new == NULL)
+ new = &noop_qdisc;
+ }
+
+ sch_tree_lock(sch);
+ drr_purge_queue(cl);
+ *old = cl->qdisc;
+ cl->qdisc = new;
+ sch_tree_unlock(sch);
+ return 0;
+}
+
+static struct Qdisc *drr_class_leaf(struct Qdisc *sch, unsigned long arg)
+{
+ struct drr_class *cl = (struct drr_class *)arg;
+
+ return cl->qdisc;
+}
+
+static void drr_qlen_notify(struct Qdisc *csh, unsigned long arg)
+{
+ struct drr_class *cl = (struct drr_class *)arg;
+
+ if (cl->qdisc->q.qlen == 0)
+ list_del(&cl->alist);
+}
+
+static int drr_dump_class(struct Qdisc *sch, unsigned long arg,
+ struct sk_buff *skb, struct tcmsg *tcm)
+{
+ struct drr_class *cl = (struct drr_class *)arg;
+ struct nlattr *nest;
+
+ tcm->tcm_parent = TC_H_ROOT;
+ tcm->tcm_handle = cl->common.classid;
+ tcm->tcm_info = cl->qdisc->handle;
+
+ nest = nla_nest_start(skb, TCA_OPTIONS);
+ if (nest == NULL)
+ goto nla_put_failure;
+ NLA_PUT_U32(skb, TCA_DRR_QUANTUM, cl->quantum);
+ return nla_nest_end(skb, nest);
+
+nla_put_failure:
+ nla_nest_cancel(skb, nest);
+ return -EMSGSIZE;
+}
+
+static int drr_dump_class_stats(struct Qdisc *sch, unsigned long arg,
+ struct gnet_dump *d)
+{
+ struct drr_class *cl = (struct drr_class *)arg;
+ struct tc_drr_stats xstats;
+
+ memset(&xstats, 0, sizeof(xstats));
+ if (cl->qdisc->q.qlen)
+ xstats.deficit = cl->deficit;
+
+ if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
+ gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
+ gnet_stats_copy_queue(d, &cl->qdisc->qstats) < 0)
+ return -1;
+
+ return gnet_stats_copy_app(d, &xstats, sizeof(xstats));
+}
+
+static void drr_walk(struct Qdisc *sch, struct qdisc_walker *arg)
+{
+ struct drr_sched *q = qdisc_priv(sch);
+ struct drr_class *cl;
+ struct hlist_node *n;
+ unsigned int i;
+
+ if (arg->stop)
+ return;
+
+ for (i = 0; i < q->clhash.hashsize; i++) {
+ hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) {
+ if (arg->count < arg->skip) {
+ arg->count++;
+ continue;
+ }
+ if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
+ arg->stop = 1;
+ return;
+ }
+ arg->count++;
+ }
+ }
+}
+
+static struct drr_class *drr_classify(struct sk_buff *skb, struct Qdisc *sch,
+ int *qerr)
+{
+ struct drr_sched *q = qdisc_priv(sch);
+ struct drr_class *cl;
+ struct tcf_result res;
+ int result;
+
+ if (TC_H_MAJ(skb->priority ^ sch->handle) == 0) {
+ cl = drr_find_class(sch, skb->priority);
+ if (cl != NULL)
+ return cl;
+ }
+
+ *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
+ result = tc_classify(skb, q->filter_list, &res);
+ if (result >= 0) {
+#ifdef CONFIG_NET_CLS_ACT
+ switch (result) {
+ case TC_ACT_QUEUED:
+ case TC_ACT_STOLEN:
+ *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
+ case TC_ACT_SHOT:
+ return NULL;
+ }
+#endif
+ cl = (struct drr_class *)res.class;
+ if (cl == NULL)
+ cl = drr_find_class(sch, res.classid);
+ return cl;
+ }
+ return NULL;
+}
+
+static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+{
+ struct drr_sched *q = qdisc_priv(sch);
+ struct drr_class *cl;
+ unsigned int len;
+ int err;
+
+ cl = drr_classify(skb, sch, &err);
+ if (cl == NULL) {
+ if (err & __NET_XMIT_BYPASS)
+ sch->qstats.drops++;
+ kfree_skb(skb);
+ return err;
+ }
+
+ len = qdisc_pkt_len(skb);
+ err = qdisc_enqueue(skb, cl->qdisc);
+ if (unlikely(err != NET_XMIT_SUCCESS)) {
+ if (net_xmit_drop_count(err)) {
+ cl->qstats.drops++;
+ sch->qstats.drops++;
+ }
+ return err;
+ }
+
+ if (cl->qdisc->q.qlen == 1) {
+ list_add_tail(&cl->alist, &q->active);
+ cl->deficit = cl->quantum;
+ }
+
+ cl->bstats.packets++;
+ cl->bstats.bytes += len;
+ sch->bstats.packets++;
+ sch->bstats.bytes += len;
+
+ sch->q.qlen++;
+ return err;
+}
+
+static struct sk_buff *drr_dequeue(struct Qdisc *sch)
+{
+ struct drr_sched *q = qdisc_priv(sch);
+ struct drr_class *cl;
+ struct sk_buff *skb;
+ unsigned int len;
+
+ if (list_empty(&q->active))
+ goto out;
+ while (1) {
+ cl = list_first_entry(&q->active, struct drr_class, alist);
+ skb = cl->qdisc->ops->peek(cl->qdisc);
+ if (skb == NULL)
+ goto out;
+
+ len = qdisc_pkt_len(skb);
+ if (len <= cl->deficit) {
+ cl->deficit -= len;
+ skb = qdisc_dequeue_peeked(cl->qdisc);
+ if (cl->qdisc->q.qlen == 0)
+ list_del(&cl->alist);
+ sch->q.qlen--;
+ return skb;
+ }
+
+ cl->deficit += cl->quantum;
+ list_move_tail(&cl->alist, &q->active);
+ }
+out:
+ return NULL;
+}
+
+static unsigned int drr_drop(struct Qdisc *sch)
+{
+ struct drr_sched *q = qdisc_priv(sch);
+ struct drr_class *cl;
+ unsigned int len;
+
+ list_for_each_entry(cl, &q->active, alist) {
+ if (cl->qdisc->ops->drop) {
+ len = cl->qdisc->ops->drop(cl->qdisc);
+ if (len > 0) {
+ sch->q.qlen--;
+ if (cl->qdisc->q.qlen == 0)
+ list_del(&cl->alist);
+ return len;
+ }
+ }
+ }
+ return 0;
+}
+
+static int drr_init_qdisc(struct Qdisc *sch, struct nlattr *opt)
+{
+ struct drr_sched *q = qdisc_priv(sch);
+ int err;
+
+ err = qdisc_class_hash_init(&q->clhash);
+ if (err < 0)
+ return err;
+ INIT_LIST_HEAD(&q->active);
+ return 0;
+}
+
+static void drr_reset_qdisc(struct Qdisc *sch)
+{
+ struct drr_sched *q = qdisc_priv(sch);
+ struct drr_class *cl;
+ struct hlist_node *n;
+ unsigned int i;
+
+ for (i = 0; i < q->clhash.hashsize; i++) {
+ hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) {
+ if (cl->qdisc->q.qlen)
+ list_del(&cl->alist);
+ qdisc_reset(cl->qdisc);
+ }
+ }
+ sch->q.qlen = 0;
+}
+
+static void drr_destroy_qdisc(struct Qdisc *sch)
+{
+ struct drr_sched *q = qdisc_priv(sch);
+ struct drr_class *cl;
+ struct hlist_node *n, *next;
+ unsigned int i;
+
+ tcf_destroy_chain(&q->filter_list);
+
+ for (i = 0; i < q->clhash.hashsize; i++) {
+ hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[i],
+ common.hnode)
+ drr_destroy_class(sch, cl);
+ }
+ qdisc_class_hash_destroy(&q->clhash);
+}
+
+static const struct Qdisc_class_ops drr_class_ops = {
+ .change = drr_change_class,
+ .delete = drr_delete_class,
+ .get = drr_get_class,
+ .put = drr_put_class,
+ .tcf_chain = drr_tcf_chain,
+ .bind_tcf = drr_bind_tcf,
+ .unbind_tcf = drr_unbind_tcf,
+ .graft = drr_graft_class,
+ .leaf = drr_class_leaf,
+ .qlen_notify = drr_qlen_notify,
+ .dump = drr_dump_class,
+ .dump_stats = drr_dump_class_stats,
+ .walk = drr_walk,
+};
+
+static struct Qdisc_ops drr_qdisc_ops __read_mostly = {
+ .cl_ops = &drr_class_ops,
+ .id = "drr",
+ .priv_size = sizeof(struct drr_sched),
+ .enqueue = drr_enqueue,
+ .dequeue = drr_dequeue,
+ .peek = qdisc_peek_dequeued,
+ .drop = drr_drop,
+ .init = drr_init_qdisc,
+ .reset = drr_reset_qdisc,
+ .destroy = drr_destroy_qdisc,
+ .owner = THIS_MODULE,
+};
+
+static int __init drr_init(void)
+{
+ return register_qdisc(&drr_qdisc_ops);
+}
+
+static void __exit drr_exit(void)
+{
+ unregister_qdisc(&drr_qdisc_ops);
+}
+
+module_init(drr_init);
+module_exit(drr_exit);
+MODULE_LICENSE("GPL");
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index ba43aab3a851..d303daa45d49 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -68,7 +68,8 @@ static int dsmark_graft(struct Qdisc *sch, unsigned long arg,
}
sch_tree_lock(sch);
- *old = xchg(&p->q, new);
+ *old = p->q;
+ p->q = new;
qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
qdisc_reset(*old);
sch_tree_unlock(sch);
@@ -313,24 +314,13 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch)
return skb;
}
-static int dsmark_requeue(struct sk_buff *skb, struct Qdisc *sch)
+static struct sk_buff *dsmark_peek(struct Qdisc *sch)
{
struct dsmark_qdisc_data *p = qdisc_priv(sch);
- int err;
-
- pr_debug("dsmark_requeue(skb %p,sch %p,[qdisc %p])\n", skb, sch, p);
-
- err = p->q->ops->requeue(skb, p->q);
- if (err != NET_XMIT_SUCCESS) {
- if (net_xmit_drop_count(err))
- sch->qstats.drops++;
- return err;
- }
- sch->q.qlen++;
- sch->qstats.requeues++;
+ pr_debug("dsmark_peek(sch %p,[qdisc %p])\n", sch, p);
- return NET_XMIT_SUCCESS;
+ return p->q->ops->peek(p->q);
}
static unsigned int dsmark_drop(struct Qdisc *sch)
@@ -496,7 +486,7 @@ static struct Qdisc_ops dsmark_qdisc_ops __read_mostly = {
.priv_size = sizeof(struct dsmark_qdisc_data),
.enqueue = dsmark_enqueue,
.dequeue = dsmark_dequeue,
- .requeue = dsmark_requeue,
+ .peek = dsmark_peek,
.drop = dsmark_drop,
.init = dsmark_init,
.reset = dsmark_reset,
diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c
index 23d258bfe8ac..92cfc9d7e3b9 100644
--- a/net/sched/sch_fifo.c
+++ b/net/sched/sch_fifo.c
@@ -83,7 +83,7 @@ struct Qdisc_ops pfifo_qdisc_ops __read_mostly = {
.priv_size = sizeof(struct fifo_sched_data),
.enqueue = pfifo_enqueue,
.dequeue = qdisc_dequeue_head,
- .requeue = qdisc_requeue,
+ .peek = qdisc_peek_head,
.drop = qdisc_queue_drop,
.init = fifo_init,
.reset = qdisc_reset_queue,
@@ -98,7 +98,7 @@ struct Qdisc_ops bfifo_qdisc_ops __read_mostly = {
.priv_size = sizeof(struct fifo_sched_data),
.enqueue = bfifo_enqueue,
.dequeue = qdisc_dequeue_head,
- .requeue = qdisc_requeue,
+ .peek = qdisc_peek_head,
.drop = qdisc_queue_drop,
.init = fifo_init,
.reset = qdisc_reset_queue,
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index cdcd16fcfeda..5f5efe4e6072 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -224,7 +224,7 @@ static void dev_watchdog(unsigned long arg)
char drivername[64];
WARN_ONCE(1, KERN_INFO "NETDEV WATCHDOG: %s (%s): transmit timed out\n",
dev->name, netdev_drivername(dev, drivername, 64));
- dev->tx_timeout(dev);
+ dev->netdev_ops->ndo_tx_timeout(dev);
}
if (!mod_timer(&dev->watchdog_timer,
round_jiffies(jiffies +
@@ -239,7 +239,7 @@ static void dev_watchdog(unsigned long arg)
void __netdev_watchdog_up(struct net_device *dev)
{
- if (dev->tx_timeout) {
+ if (dev->netdev_ops->ndo_tx_timeout) {
if (dev->watchdog_timeo <= 0)
dev->watchdog_timeo = 5*HZ;
if (!mod_timer(&dev->watchdog_timer,
@@ -311,21 +311,12 @@ static struct sk_buff *noop_dequeue(struct Qdisc * qdisc)
return NULL;
}
-static int noop_requeue(struct sk_buff *skb, struct Qdisc* qdisc)
-{
- if (net_ratelimit())
- printk(KERN_DEBUG "%s deferred output. It is buggy.\n",
- skb->dev->name);
- kfree_skb(skb);
- return NET_XMIT_CN;
-}
-
struct Qdisc_ops noop_qdisc_ops __read_mostly = {
.id = "noop",
.priv_size = 0,
.enqueue = noop_enqueue,
.dequeue = noop_dequeue,
- .requeue = noop_requeue,
+ .peek = noop_dequeue,
.owner = THIS_MODULE,
};
@@ -340,7 +331,6 @@ struct Qdisc noop_qdisc = {
.flags = TCQ_F_BUILTIN,
.ops = &noop_qdisc_ops,
.list = LIST_HEAD_INIT(noop_qdisc.list),
- .requeue.lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock),
.q.lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock),
.dev_queue = &noop_netdev_queue,
};
@@ -351,7 +341,7 @@ static struct Qdisc_ops noqueue_qdisc_ops __read_mostly = {
.priv_size = 0,
.enqueue = noop_enqueue,
.dequeue = noop_dequeue,
- .requeue = noop_requeue,
+ .peek = noop_dequeue,
.owner = THIS_MODULE,
};
@@ -367,7 +357,6 @@ static struct Qdisc noqueue_qdisc = {
.flags = TCQ_F_BUILTIN,
.ops = &noqueue_qdisc_ops,
.list = LIST_HEAD_INIT(noqueue_qdisc.list),
- .requeue.lock = __SPIN_LOCK_UNLOCKED(noqueue_qdisc.q.lock),
.q.lock = __SPIN_LOCK_UNLOCKED(noqueue_qdisc.q.lock),
.dev_queue = &noqueue_netdev_queue,
};
@@ -416,10 +405,17 @@ static struct sk_buff *pfifo_fast_dequeue(struct Qdisc* qdisc)
return NULL;
}
-static int pfifo_fast_requeue(struct sk_buff *skb, struct Qdisc* qdisc)
+static struct sk_buff *pfifo_fast_peek(struct Qdisc* qdisc)
{
- qdisc->q.qlen++;
- return __qdisc_requeue(skb, qdisc, prio2list(skb, qdisc));
+ int prio;
+ struct sk_buff_head *list = qdisc_priv(qdisc);
+
+ for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) {
+ if (!skb_queue_empty(list + prio))
+ return skb_peek(list + prio);
+ }
+
+ return NULL;
}
static void pfifo_fast_reset(struct Qdisc* qdisc)
@@ -462,7 +458,7 @@ static struct Qdisc_ops pfifo_fast_ops __read_mostly = {
.priv_size = PFIFO_FAST_BANDS * sizeof(struct sk_buff_head),
.enqueue = pfifo_fast_enqueue,
.dequeue = pfifo_fast_dequeue,
- .requeue = pfifo_fast_requeue,
+ .peek = pfifo_fast_peek,
.init = pfifo_fast_init,
.reset = pfifo_fast_reset,
.dump = pfifo_fast_dump,
@@ -488,7 +484,6 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
sch->padded = (char *) sch - (char *) p;
INIT_LIST_HEAD(&sch->list);
- skb_queue_head_init(&sch->requeue);
skb_queue_head_init(&sch->q);
sch->ops = ops;
sch->enqueue = ops->enqueue;
@@ -531,6 +526,9 @@ void qdisc_reset(struct Qdisc *qdisc)
if (ops->reset)
ops->reset(qdisc);
+
+ kfree_skb(qdisc->gso_skb);
+ qdisc->gso_skb = NULL;
}
EXPORT_SYMBOL(qdisc_reset);
@@ -557,8 +555,6 @@ void qdisc_destroy(struct Qdisc *qdisc)
dev_put(qdisc_dev(qdisc));
kfree_skb(qdisc->gso_skb);
- __skb_queue_purge(&qdisc->requeue);
-
kfree((char *) qdisc - qdisc->padded);
}
EXPORT_SYMBOL(qdisc_destroy);
diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c
index c1ad6b8de105..40408d595c08 100644
--- a/net/sched/sch_gred.c
+++ b/net/sched/sch_gred.c
@@ -240,26 +240,6 @@ congestion_drop:
return NET_XMIT_CN;
}
-static int gred_requeue(struct sk_buff *skb, struct Qdisc* sch)
-{
- struct gred_sched *t = qdisc_priv(sch);
- struct gred_sched_data *q;
- u16 dp = tc_index_to_dp(skb);
-
- if (dp >= t->DPs || (q = t->tab[dp]) == NULL) {
- if (net_ratelimit())
- printk(KERN_WARNING "GRED: Unable to relocate VQ 0x%x "
- "for requeue, screwing up backlog.\n",
- tc_index_to_dp(skb));
- } else {
- if (red_is_idling(&q->parms))
- red_end_of_idle_period(&q->parms);
- q->backlog += qdisc_pkt_len(skb);
- }
-
- return qdisc_requeue(skb, sch);
-}
-
static struct sk_buff *gred_dequeue(struct Qdisc* sch)
{
struct sk_buff *skb;
@@ -602,7 +582,7 @@ static struct Qdisc_ops gred_qdisc_ops __read_mostly = {
.priv_size = sizeof(struct gred_sched),
.enqueue = gred_enqueue,
.dequeue = gred_dequeue,
- .requeue = gred_requeue,
+ .peek = qdisc_peek_head,
.drop = gred_drop,
.init = gred_init,
.reset = gred_reset,
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index c1e77da8cd09..45c31b1a4e1d 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -184,7 +184,6 @@ struct hfsc_sched
struct rb_root eligible; /* eligible tree */
struct list_head droplist; /* active leaf class list (for
dropping) */
- struct sk_buff_head requeue; /* requeued packet */
struct qdisc_watchdog watchdog; /* watchdog timer */
};
@@ -880,28 +879,20 @@ set_passive(struct hfsc_class *cl)
*/
}
-/*
- * hack to get length of first packet in queue.
- */
static unsigned int
qdisc_peek_len(struct Qdisc *sch)
{
struct sk_buff *skb;
unsigned int len;
- skb = sch->dequeue(sch);
+ skb = sch->ops->peek(sch);
if (skb == NULL) {
if (net_ratelimit())
printk("qdisc_peek_len: non work-conserving qdisc ?\n");
return 0;
}
len = qdisc_pkt_len(skb);
- if (unlikely(sch->ops->requeue(skb, sch) != NET_XMIT_SUCCESS)) {
- if (net_ratelimit())
- printk("qdisc_peek_len: failed to requeue\n");
- qdisc_tree_decrease_qlen(sch, 1);
- return 0;
- }
+
return len;
}
@@ -1027,6 +1018,14 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
}
cur_time = psched_get_time();
+ if (tca[TCA_RATE]) {
+ err = gen_replace_estimator(&cl->bstats, &cl->rate_est,
+ qdisc_root_sleeping_lock(sch),
+ tca[TCA_RATE]);
+ if (err)
+ return err;
+ }
+
sch_tree_lock(sch);
if (rsc != NULL)
hfsc_change_rsc(cl, rsc, cur_time);
@@ -1043,10 +1042,6 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
}
sch_tree_unlock(sch);
- if (tca[TCA_RATE])
- gen_replace_estimator(&cl->bstats, &cl->rate_est,
- qdisc_root_sleeping_lock(sch),
- tca[TCA_RATE]);
return 0;
}
@@ -1072,6 +1067,16 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
if (cl == NULL)
return -ENOBUFS;
+ if (tca[TCA_RATE]) {
+ err = gen_new_estimator(&cl->bstats, &cl->rate_est,
+ qdisc_root_sleeping_lock(sch),
+ tca[TCA_RATE]);
+ if (err) {
+ kfree(cl);
+ return err;
+ }
+ }
+
if (rsc != NULL)
hfsc_change_rsc(cl, rsc, 0);
if (fsc != NULL)
@@ -1102,9 +1107,6 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
qdisc_class_hash_grow(sch, &q->clhash);
- if (tca[TCA_RATE])
- gen_new_estimator(&cl->bstats, &cl->rate_est,
- qdisc_root_sleeping_lock(sch), tca[TCA_RATE]);
*arg = (unsigned long)cl;
return 0;
}
@@ -1211,7 +1213,8 @@ hfsc_graft_class(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
sch_tree_lock(sch);
hfsc_purge_queue(sch, cl);
- *old = xchg(&cl->qdisc, new);
+ *old = cl->qdisc;
+ cl->qdisc = new;
sch_tree_unlock(sch);
return 0;
}
@@ -1440,7 +1443,6 @@ hfsc_init_qdisc(struct Qdisc *sch, struct nlattr *opt)
return err;
q->eligible = RB_ROOT;
INIT_LIST_HEAD(&q->droplist);
- skb_queue_head_init(&q->requeue);
q->root.cl_common.classid = sch->handle;
q->root.refcnt = 1;
@@ -1525,7 +1527,6 @@ hfsc_reset_qdisc(struct Qdisc *sch)
hlist_for_each_entry(cl, n, &q->clhash.hash[i], cl_common.hnode)
hfsc_reset_class(cl);
}
- __skb_queue_purge(&q->requeue);
q->eligible = RB_ROOT;
INIT_LIST_HEAD(&q->droplist);
qdisc_watchdog_cancel(&q->watchdog);
@@ -1550,7 +1551,6 @@ hfsc_destroy_qdisc(struct Qdisc *sch)
hfsc_destroy_class(sch, cl);
}
qdisc_class_hash_destroy(&q->clhash);
- __skb_queue_purge(&q->requeue);
qdisc_watchdog_cancel(&q->watchdog);
}
@@ -1574,7 +1574,7 @@ static int
hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
{
struct hfsc_class *cl;
- int err;
+ int uninitialized_var(err);
cl = hfsc_classify(skb, sch, &err);
if (cl == NULL) {
@@ -1617,8 +1617,6 @@ hfsc_dequeue(struct Qdisc *sch)
if (sch->q.qlen == 0)
return NULL;
- if ((skb = __skb_dequeue(&q->requeue)))
- goto out;
cur_time = psched_get_time();
@@ -1642,7 +1640,7 @@ hfsc_dequeue(struct Qdisc *sch)
}
}
- skb = cl->qdisc->dequeue(cl->qdisc);
+ skb = qdisc_dequeue_peeked(cl->qdisc);
if (skb == NULL) {
if (net_ratelimit())
printk("HFSC: Non-work-conserving qdisc ?\n");
@@ -1667,24 +1665,12 @@ hfsc_dequeue(struct Qdisc *sch)
set_passive(cl);
}
- out:
sch->flags &= ~TCQ_F_THROTTLED;
sch->q.qlen--;
return skb;
}
-static int
-hfsc_requeue(struct sk_buff *skb, struct Qdisc *sch)
-{
- struct hfsc_sched *q = qdisc_priv(sch);
-
- __skb_queue_head(&q->requeue, skb);
- sch->q.qlen++;
- sch->qstats.requeues++;
- return NET_XMIT_SUCCESS;
-}
-
static unsigned int
hfsc_drop(struct Qdisc *sch)
{
@@ -1735,7 +1721,7 @@ static struct Qdisc_ops hfsc_qdisc_ops __read_mostly = {
.dump = hfsc_dump_qdisc,
.enqueue = hfsc_enqueue,
.dequeue = hfsc_dequeue,
- .requeue = hfsc_requeue,
+ .peek = qdisc_peek_dequeued,
.drop = hfsc_drop,
.cl_ops = &hfsc_class_ops,
.priv_size = sizeof(struct hfsc_sched),
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index d14f02056ae6..5070643ce534 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -84,12 +84,12 @@ struct htb_class {
unsigned int children;
struct htb_class *parent; /* parent class */
+ int prio; /* these two are used only by leaves... */
+ int quantum; /* but stored for parent-to-leaf return */
+
union {
struct htb_class_leaf {
struct Qdisc *q;
- int prio;
- int aprio;
- int quantum;
int deficit[TC_HTB_MAXDEPTH];
struct list_head drop_list;
} leaf;
@@ -123,19 +123,8 @@ struct htb_class {
psched_tdiff_t mbuffer; /* max wait time */
long tokens, ctokens; /* current number of tokens */
psched_time_t t_c; /* checkpoint time */
-
- int prio; /* For parent to leaf return possible here */
- int quantum; /* we do backup. Finally full replacement */
- /* of un.leaf originals should be done. */
};
-static inline long L2T(struct htb_class *cl, struct qdisc_rate_table *rate,
- int size)
-{
- long result = qdisc_l2t(rate, size);
- return result;
-}
-
struct htb_sched {
struct Qdisc_class_hash clhash;
struct list_head drops[TC_HTB_NUMPRIO];/* active leaves (for drops) */
@@ -152,9 +141,6 @@ struct htb_sched {
/* time of nearest event per level (row) */
psched_time_t near_ev_cache[TC_HTB_MAXDEPTH];
- /* whether we hit non-work conserving class during this dequeue; we use */
- int nwc_hit; /* this to disable mindelay complaint in dequeue */
-
int defcls; /* class where unclassified flows go to */
/* filters for qdisc itself */
@@ -527,10 +513,10 @@ static inline void htb_activate(struct htb_sched *q, struct htb_class *cl)
WARN_ON(cl->level || !cl->un.leaf.q || !cl->un.leaf.q->q.qlen);
if (!cl->prio_activity) {
- cl->prio_activity = 1 << (cl->un.leaf.aprio = cl->un.leaf.prio);
+ cl->prio_activity = 1 << cl->prio;
htb_activate_prios(q, cl);
list_add_tail(&cl->un.leaf.drop_list,
- q->drops + cl->un.leaf.aprio);
+ q->drops + cl->prio);
}
}
@@ -551,7 +537,7 @@ static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl)
static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
{
- int ret;
+ int uninitialized_var(ret);
struct htb_sched *q = qdisc_priv(sch);
struct htb_class *cl = htb_classify(skb, sch, &ret);
@@ -591,45 +577,30 @@ static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
return NET_XMIT_SUCCESS;
}
-/* TODO: requeuing packet charges it to policers again !! */
-static int htb_requeue(struct sk_buff *skb, struct Qdisc *sch)
+static inline void htb_accnt_tokens(struct htb_class *cl, int bytes, long diff)
{
- int ret;
- struct htb_sched *q = qdisc_priv(sch);
- struct htb_class *cl = htb_classify(skb, sch, &ret);
- struct sk_buff *tskb;
+ long toks = diff + cl->tokens;
- if (cl == HTB_DIRECT) {
- /* enqueue to helper queue */
- if (q->direct_queue.qlen < q->direct_qlen) {
- __skb_queue_head(&q->direct_queue, skb);
- } else {
- __skb_queue_head(&q->direct_queue, skb);
- tskb = __skb_dequeue_tail(&q->direct_queue);
- kfree_skb(tskb);
- sch->qstats.drops++;
- return NET_XMIT_CN;
- }
-#ifdef CONFIG_NET_CLS_ACT
- } else if (!cl) {
- if (ret & __NET_XMIT_BYPASS)
- sch->qstats.drops++;
- kfree_skb(skb);
- return ret;
-#endif
- } else if ((ret = cl->un.leaf.q->ops->requeue(skb, cl->un.leaf.q)) !=
- NET_XMIT_SUCCESS) {
- if (net_xmit_drop_count(ret)) {
- sch->qstats.drops++;
- cl->qstats.drops++;
- }
- return ret;
- } else
- htb_activate(q, cl);
+ if (toks > cl->buffer)
+ toks = cl->buffer;
+ toks -= (long) qdisc_l2t(cl->rate, bytes);
+ if (toks <= -cl->mbuffer)
+ toks = 1 - cl->mbuffer;
- sch->q.qlen++;
- sch->qstats.requeues++;
- return NET_XMIT_SUCCESS;
+ cl->tokens = toks;
+}
+
+static inline void htb_accnt_ctokens(struct htb_class *cl, int bytes, long diff)
+{
+ long toks = diff + cl->ctokens;
+
+ if (toks > cl->cbuffer)
+ toks = cl->cbuffer;
+ toks -= (long) qdisc_l2t(cl->ceil, bytes);
+ if (toks <= -cl->mbuffer)
+ toks = 1 - cl->mbuffer;
+
+ cl->ctokens = toks;
}
/**
@@ -647,26 +618,20 @@ static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,
int level, struct sk_buff *skb)
{
int bytes = qdisc_pkt_len(skb);
- long toks, diff;
enum htb_cmode old_mode;
-
-#define HTB_ACCNT(T,B,R) toks = diff + cl->T; \
- if (toks > cl->B) toks = cl->B; \
- toks -= L2T(cl, cl->R, bytes); \
- if (toks <= -cl->mbuffer) toks = 1-cl->mbuffer; \
- cl->T = toks
+ long diff;
while (cl) {
diff = psched_tdiff_bounded(q->now, cl->t_c, cl->mbuffer);
if (cl->level >= level) {
if (cl->level == level)
cl->xstats.lends++;
- HTB_ACCNT(tokens, buffer, rate);
+ htb_accnt_tokens(cl, bytes, diff);
} else {
cl->xstats.borrows++;
cl->tokens += diff; /* we moved t_c; update tokens */
}
- HTB_ACCNT(ctokens, cbuffer, ceil);
+ htb_accnt_ctokens(cl, bytes, diff);
cl->t_c = q->now;
old_mode = cl->cmode;
@@ -733,14 +698,14 @@ static struct rb_node *htb_id_find_next_upper(int prio, struct rb_node *n,
while (n) {
struct htb_class *cl =
rb_entry(n, struct htb_class, node[prio]);
- if (id == cl->common.classid)
- return n;
if (id > cl->common.classid) {
n = n->rb_right;
- } else {
+ } else if (id < cl->common.classid) {
r = n;
n = n->rb_left;
+ } else {
+ return n;
}
}
return r;
@@ -761,7 +726,7 @@ static struct htb_class *htb_lookup_leaf(struct rb_root *tree, int prio,
u32 *pid;
} stk[TC_HTB_MAXDEPTH], *sp = stk;
- WARN_ON(!tree->rb_node);
+ BUG_ON(!tree->rb_node);
sp->root = tree->rb_node;
sp->pptr = pptr;
sp->pid = pid;
@@ -781,9 +746,10 @@ static struct htb_class *htb_lookup_leaf(struct rb_root *tree, int prio,
*sp->pptr = (*sp->pptr)->rb_left;
if (sp > stk) {
sp--;
- WARN_ON(!*sp->pptr);
- if (!*sp->pptr)
+ if (!*sp->pptr) {
+ WARN_ON(1);
return NULL;
+ }
htb_next_rb_node(sp->pptr);
}
} else {
@@ -814,8 +780,7 @@ static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, int prio,
do {
next:
- WARN_ON(!cl);
- if (!cl)
+ if (unlikely(!cl))
return NULL;
/* class can be empty - it is unlikely but can be true if leaf
@@ -849,7 +814,7 @@ next:
cl->common.classid);
cl->warned = 1;
}
- q->nwc_hit++;
+
htb_next_rb_node((level ? cl->parent->un.inner.ptr : q->
ptr[0]) + prio);
cl = htb_lookup_leaf(q->row[level] + prio, prio,
@@ -861,7 +826,7 @@ next:
if (likely(skb != NULL)) {
cl->un.leaf.deficit[level] -= qdisc_pkt_len(skb);
if (cl->un.leaf.deficit[level] < 0) {
- cl->un.leaf.deficit[level] += cl->un.leaf.quantum;
+ cl->un.leaf.deficit[level] += cl->quantum;
htb_next_rb_node((level ? cl->parent->un.inner.ptr : q->
ptr[0]) + prio);
}
@@ -894,7 +859,7 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch)
q->now = psched_get_time();
next_event = q->now + 5 * PSCHED_TICKS_PER_SEC;
- q->nwc_hit = 0;
+
for (level = 0; level < TC_HTB_MAXDEPTH; level++) {
/* common case optimization - skip event handler quickly */
int m;
@@ -1095,8 +1060,8 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
opt.buffer = cl->buffer;
opt.ceil = cl->ceil->rate;
opt.cbuffer = cl->cbuffer;
- opt.quantum = cl->un.leaf.quantum;
- opt.prio = cl->un.leaf.prio;
+ opt.quantum = cl->quantum;
+ opt.prio = cl->prio;
opt.level = cl->level;
NLA_PUT(skb, TCA_HTB_PARMS, sizeof(opt), &opt);
@@ -1141,7 +1106,9 @@ static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
== NULL)
return -ENOBUFS;
sch_tree_lock(sch);
- if ((*old = xchg(&cl->un.leaf.q, new)) != NULL) {
+ *old = cl->un.leaf.q;
+ cl->un.leaf.q = new;
+ if (*old != NULL) {
qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
qdisc_reset(*old);
}
@@ -1198,8 +1165,6 @@ static void htb_parent_to_leaf(struct htb_sched *q, struct htb_class *cl,
memset(&parent->un.inner, 0, sizeof(parent->un.inner));
INIT_LIST_HEAD(&parent->un.leaf.drop_list);
parent->un.leaf.q = new_q ? new_q : &noop_qdisc;
- parent->un.leaf.quantum = parent->quantum;
- parent->un.leaf.prio = parent->prio;
parent->tokens = parent->buffer;
parent->ctokens = parent->cbuffer;
parent->t_c = psched_get_time();
@@ -1371,9 +1336,14 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
if ((cl = kzalloc(sizeof(*cl), GFP_KERNEL)) == NULL)
goto failure;
- gen_new_estimator(&cl->bstats, &cl->rate_est,
- qdisc_root_sleeping_lock(sch),
- tca[TCA_RATE] ? : &est.nla);
+ err = gen_new_estimator(&cl->bstats, &cl->rate_est,
+ qdisc_root_sleeping_lock(sch),
+ tca[TCA_RATE] ? : &est.nla);
+ if (err) {
+ kfree(cl);
+ goto failure;
+ }
+
cl->refcnt = 1;
cl->children = 0;
INIT_LIST_HEAD(&cl->un.leaf.drop_list);
@@ -1425,37 +1395,36 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
if (parent)
parent->children++;
} else {
- if (tca[TCA_RATE])
- gen_replace_estimator(&cl->bstats, &cl->rate_est,
- qdisc_root_sleeping_lock(sch),
- tca[TCA_RATE]);
+ if (tca[TCA_RATE]) {
+ err = gen_replace_estimator(&cl->bstats, &cl->rate_est,
+ qdisc_root_sleeping_lock(sch),
+ tca[TCA_RATE]);
+ if (err)
+ return err;
+ }
sch_tree_lock(sch);
}
/* it used to be a nasty bug here, we have to check that node
is really leaf before changing cl->un.leaf ! */
if (!cl->level) {
- cl->un.leaf.quantum = rtab->rate.rate / q->rate2quantum;
- if (!hopt->quantum && cl->un.leaf.quantum < 1000) {
+ cl->quantum = rtab->rate.rate / q->rate2quantum;
+ if (!hopt->quantum && cl->quantum < 1000) {
printk(KERN_WARNING
"HTB: quantum of class %X is small. Consider r2q change.\n",
cl->common.classid);
- cl->un.leaf.quantum = 1000;
+ cl->quantum = 1000;
}
- if (!hopt->quantum && cl->un.leaf.quantum > 200000) {
+ if (!hopt->quantum && cl->quantum > 200000) {
printk(KERN_WARNING
"HTB: quantum of class %X is big. Consider r2q change.\n",
cl->common.classid);
- cl->un.leaf.quantum = 200000;
+ cl->quantum = 200000;
}
if (hopt->quantum)
- cl->un.leaf.quantum = hopt->quantum;
- if ((cl->un.leaf.prio = hopt->prio) >= TC_HTB_NUMPRIO)
- cl->un.leaf.prio = TC_HTB_NUMPRIO - 1;
-
- /* backup for htb_parent_to_leaf */
- cl->quantum = cl->un.leaf.quantum;
- cl->prio = cl->un.leaf.prio;
+ cl->quantum = hopt->quantum;
+ if ((cl->prio = hopt->prio) >= TC_HTB_NUMPRIO)
+ cl->prio = TC_HTB_NUMPRIO - 1;
}
cl->buffer = hopt->buffer;
@@ -1565,7 +1534,7 @@ static struct Qdisc_ops htb_qdisc_ops __read_mostly = {
.priv_size = sizeof(struct htb_sched),
.enqueue = htb_enqueue,
.dequeue = htb_dequeue,
- .requeue = htb_requeue,
+ .peek = qdisc_peek_dequeued,
.drop = htb_drop,
.init = htb_init,
.reset = htb_reset,
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
index 915f3149dde2..7e151861794b 100644
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -92,40 +92,6 @@ multiq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
return ret;
}
-
-static int
-multiq_requeue(struct sk_buff *skb, struct Qdisc *sch)
-{
- struct Qdisc *qdisc;
- struct multiq_sched_data *q = qdisc_priv(sch);
- int ret;
-
- qdisc = multiq_classify(skb, sch, &ret);
-#ifdef CONFIG_NET_CLS_ACT
- if (qdisc == NULL) {
- if (ret & __NET_XMIT_BYPASS)
- sch->qstats.drops++;
- kfree_skb(skb);
- return ret;
- }
-#endif
-
- ret = qdisc->ops->requeue(skb, qdisc);
- if (ret == NET_XMIT_SUCCESS) {
- sch->q.qlen++;
- sch->qstats.requeues++;
- if (q->curband)
- q->curband--;
- else
- q->curband = q->bands - 1;
- return NET_XMIT_SUCCESS;
- }
- if (net_xmit_drop_count(ret))
- sch->qstats.drops++;
- return ret;
-}
-
-
static struct sk_buff *multiq_dequeue(struct Qdisc *sch)
{
struct multiq_sched_data *q = qdisc_priv(sch);
@@ -140,7 +106,7 @@ static struct sk_buff *multiq_dequeue(struct Qdisc *sch)
q->curband = 0;
/* Check that target subqueue is available before
- * pulling an skb to avoid excessive requeues
+ * pulling an skb to avoid head-of-line blocking.
*/
if (!__netif_subqueue_stopped(qdisc_dev(sch), q->curband)) {
qdisc = q->queues[q->curband];
@@ -155,6 +121,34 @@ static struct sk_buff *multiq_dequeue(struct Qdisc *sch)
}
+static struct sk_buff *multiq_peek(struct Qdisc *sch)
+{
+ struct multiq_sched_data *q = qdisc_priv(sch);
+ unsigned int curband = q->curband;
+ struct Qdisc *qdisc;
+ struct sk_buff *skb;
+ int band;
+
+ for (band = 0; band < q->bands; band++) {
+ /* cycle through bands to ensure fairness */
+ curband++;
+ if (curband >= q->bands)
+ curband = 0;
+
+ /* Check that target subqueue is available before
+ * pulling an skb to avoid head-of-line blocking.
+ */
+ if (!__netif_subqueue_stopped(qdisc_dev(sch), curband)) {
+ qdisc = q->queues[curband];
+ skb = qdisc->ops->peek(qdisc);
+ if (skb)
+ return skb;
+ }
+ }
+ return NULL;
+
+}
+
static unsigned int multiq_drop(struct Qdisc *sch)
{
struct multiq_sched_data *q = qdisc_priv(sch);
@@ -220,7 +214,8 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt)
q->bands = qopt->bands;
for (i = q->bands; i < q->max_bands; i++) {
if (q->queues[i] != &noop_qdisc) {
- struct Qdisc *child = xchg(&q->queues[i], &noop_qdisc);
+ struct Qdisc *child = q->queues[i];
+ q->queues[i] = &noop_qdisc;
qdisc_tree_decrease_qlen(child, child->q.qlen);
qdisc_destroy(child);
}
@@ -230,7 +225,7 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt)
for (i = 0; i < q->bands; i++) {
if (q->queues[i] == &noop_qdisc) {
- struct Qdisc *child;
+ struct Qdisc *child, *old;
child = qdisc_create_dflt(qdisc_dev(sch),
sch->dev_queue,
&pfifo_qdisc_ops,
@@ -238,12 +233,13 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt)
i + 1));
if (child) {
sch_tree_lock(sch);
- child = xchg(&q->queues[i], child);
+ old = q->queues[i];
+ q->queues[i] = child;
- if (child != &noop_qdisc) {
- qdisc_tree_decrease_qlen(child,
- child->q.qlen);
- qdisc_destroy(child);
+ if (old != &noop_qdisc) {
+ qdisc_tree_decrease_qlen(old,
+ old->q.qlen);
+ qdisc_destroy(old);
}
sch_tree_unlock(sch);
}
@@ -451,7 +447,7 @@ static struct Qdisc_ops multiq_qdisc_ops __read_mostly = {
.priv_size = sizeof(struct multiq_sched_data),
.enqueue = multiq_enqueue,
.dequeue = multiq_dequeue,
- .requeue = multiq_requeue,
+ .peek = multiq_peek,
.drop = multiq_drop,
.init = multiq_init,
.reset = multiq_reset,
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 98402f0efa47..d876b8734848 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -230,7 +230,11 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
*/
cb->time_to_send = psched_get_time();
q->counter = 0;
- ret = q->qdisc->ops->requeue(skb, q->qdisc);
+
+ __skb_queue_head(&q->qdisc->q, skb);
+ q->qdisc->qstats.backlog += qdisc_pkt_len(skb);
+ q->qdisc->qstats.requeues++;
+ ret = NET_XMIT_SUCCESS;
}
if (likely(ret == NET_XMIT_SUCCESS)) {
@@ -245,20 +249,6 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
return ret;
}
-/* Requeue packets but don't change time stamp */
-static int netem_requeue(struct sk_buff *skb, struct Qdisc *sch)
-{
- struct netem_sched_data *q = qdisc_priv(sch);
- int ret;
-
- if ((ret = q->qdisc->ops->requeue(skb, q->qdisc)) == 0) {
- sch->q.qlen++;
- sch->qstats.requeues++;
- }
-
- return ret;
-}
-
static unsigned int netem_drop(struct Qdisc* sch)
{
struct netem_sched_data *q = qdisc_priv(sch);
@@ -276,29 +266,25 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch)
struct netem_sched_data *q = qdisc_priv(sch);
struct sk_buff *skb;
- smp_mb();
if (sch->flags & TCQ_F_THROTTLED)
return NULL;
- skb = q->qdisc->dequeue(q->qdisc);
+ skb = q->qdisc->ops->peek(q->qdisc);
if (skb) {
const struct netem_skb_cb *cb = netem_skb_cb(skb);
psched_time_t now = psched_get_time();
/* if more time remaining? */
if (cb->time_to_send <= now) {
+ skb = qdisc_dequeue_peeked(q->qdisc);
+ if (unlikely(!skb))
+ return NULL;
+
pr_debug("netem_dequeue: return skb=%p\n", skb);
sch->q.qlen--;
return skb;
}
- if (unlikely(q->qdisc->ops->requeue(skb, q->qdisc) != NET_XMIT_SUCCESS)) {
- qdisc_tree_decrease_qlen(q->qdisc, 1);
- sch->qstats.drops++;
- printk(KERN_ERR "netem: %s could not requeue\n",
- q->qdisc->ops->id);
- }
-
qdisc_watchdog_schedule(&q->watchdog, cb->time_to_send);
}
@@ -341,14 +327,13 @@ static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr)
root_lock = qdisc_root_sleeping_lock(sch);
spin_lock_bh(root_lock);
- d = xchg(&q->delay_dist, d);
+ kfree(q->delay_dist);
+ q->delay_dist = d;
spin_unlock_bh(root_lock);
-
- kfree(d);
return 0;
}
-static int get_correlation(struct Qdisc *sch, const struct nlattr *attr)
+static void get_correlation(struct Qdisc *sch, const struct nlattr *attr)
{
struct netem_sched_data *q = qdisc_priv(sch);
const struct tc_netem_corr *c = nla_data(attr);
@@ -356,27 +341,24 @@ static int get_correlation(struct Qdisc *sch, const struct nlattr *attr)
init_crandom(&q->delay_cor, c->delay_corr);
init_crandom(&q->loss_cor, c->loss_corr);
init_crandom(&q->dup_cor, c->dup_corr);
- return 0;
}
-static int get_reorder(struct Qdisc *sch, const struct nlattr *attr)
+static void get_reorder(struct Qdisc *sch, const struct nlattr *attr)
{
struct netem_sched_data *q = qdisc_priv(sch);
const struct tc_netem_reorder *r = nla_data(attr);
q->reorder = r->probability;
init_crandom(&q->reorder_cor, r->correlation);
- return 0;
}
-static int get_corrupt(struct Qdisc *sch, const struct nlattr *attr)
+static void get_corrupt(struct Qdisc *sch, const struct nlattr *attr)
{
struct netem_sched_data *q = qdisc_priv(sch);
const struct tc_netem_corrupt *r = nla_data(attr);
q->corrupt = r->probability;
init_crandom(&q->corrupt_cor, r->correlation);
- return 0;
}
static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = {
@@ -435,11 +417,8 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt)
if (q->gap)
q->reorder = ~0;
- if (tb[TCA_NETEM_CORR]) {
- ret = get_correlation(sch, tb[TCA_NETEM_CORR]);
- if (ret)
- return ret;
- }
+ if (tb[TCA_NETEM_CORR])
+ get_correlation(sch, tb[TCA_NETEM_CORR]);
if (tb[TCA_NETEM_DELAY_DIST]) {
ret = get_dist_table(sch, tb[TCA_NETEM_DELAY_DIST]);
@@ -447,17 +426,11 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt)
return ret;
}
- if (tb[TCA_NETEM_REORDER]) {
- ret = get_reorder(sch, tb[TCA_NETEM_REORDER]);
- if (ret)
- return ret;
- }
+ if (tb[TCA_NETEM_REORDER])
+ get_reorder(sch, tb[TCA_NETEM_REORDER]);
- if (tb[TCA_NETEM_CORRUPT]) {
- ret = get_corrupt(sch, tb[TCA_NETEM_CORRUPT]);
- if (ret)
- return ret;
- }
+ if (tb[TCA_NETEM_CORRUPT])
+ get_corrupt(sch, tb[TCA_NETEM_CORRUPT]);
return 0;
}
@@ -538,7 +511,7 @@ static struct Qdisc_ops tfifo_qdisc_ops __read_mostly = {
.priv_size = sizeof(struct fifo_sched_data),
.enqueue = tfifo_enqueue,
.dequeue = qdisc_dequeue_head,
- .requeue = qdisc_requeue,
+ .peek = qdisc_peek_head,
.drop = qdisc_queue_drop,
.init = tfifo_init,
.reset = qdisc_reset_queue,
@@ -621,99 +594,12 @@ nla_put_failure:
return -1;
}
-static int netem_dump_class(struct Qdisc *sch, unsigned long cl,
- struct sk_buff *skb, struct tcmsg *tcm)
-{
- struct netem_sched_data *q = qdisc_priv(sch);
-
- if (cl != 1) /* only one class */
- return -ENOENT;
-
- tcm->tcm_handle |= TC_H_MIN(1);
- tcm->tcm_info = q->qdisc->handle;
-
- return 0;
-}
-
-static int netem_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
- struct Qdisc **old)
-{
- struct netem_sched_data *q = qdisc_priv(sch);
-
- if (new == NULL)
- new = &noop_qdisc;
-
- sch_tree_lock(sch);
- *old = xchg(&q->qdisc, new);
- qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
- qdisc_reset(*old);
- sch_tree_unlock(sch);
-
- return 0;
-}
-
-static struct Qdisc *netem_leaf(struct Qdisc *sch, unsigned long arg)
-{
- struct netem_sched_data *q = qdisc_priv(sch);
- return q->qdisc;
-}
-
-static unsigned long netem_get(struct Qdisc *sch, u32 classid)
-{
- return 1;
-}
-
-static void netem_put(struct Qdisc *sch, unsigned long arg)
-{
-}
-
-static int netem_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
- struct nlattr **tca, unsigned long *arg)
-{
- return -ENOSYS;
-}
-
-static int netem_delete(struct Qdisc *sch, unsigned long arg)
-{
- return -ENOSYS;
-}
-
-static void netem_walk(struct Qdisc *sch, struct qdisc_walker *walker)
-{
- if (!walker->stop) {
- if (walker->count >= walker->skip)
- if (walker->fn(sch, 1, walker) < 0) {
- walker->stop = 1;
- return;
- }
- walker->count++;
- }
-}
-
-static struct tcf_proto **netem_find_tcf(struct Qdisc *sch, unsigned long cl)
-{
- return NULL;
-}
-
-static const struct Qdisc_class_ops netem_class_ops = {
- .graft = netem_graft,
- .leaf = netem_leaf,
- .get = netem_get,
- .put = netem_put,
- .change = netem_change_class,
- .delete = netem_delete,
- .walk = netem_walk,
- .tcf_chain = netem_find_tcf,
- .dump = netem_dump_class,
-};
-
static struct Qdisc_ops netem_qdisc_ops __read_mostly = {
.id = "netem",
- .cl_ops = &netem_class_ops,
.priv_size = sizeof(struct netem_sched_data),
.enqueue = netem_enqueue,
.dequeue = netem_dequeue,
- .requeue = netem_requeue,
+ .peek = qdisc_peek_dequeued,
.drop = netem_drop,
.init = netem_init,
.reset = netem_reset,
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index 504a78cdb718..94cecef70145 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -93,34 +93,20 @@ prio_enqueue(struct sk_buff *skb, struct Qdisc *sch)
return ret;
}
-
-static int
-prio_requeue(struct sk_buff *skb, struct Qdisc* sch)
+static struct sk_buff *prio_peek(struct Qdisc *sch)
{
- struct Qdisc *qdisc;
- int ret;
-
- qdisc = prio_classify(skb, sch, &ret);
-#ifdef CONFIG_NET_CLS_ACT
- if (qdisc == NULL) {
- if (ret & __NET_XMIT_BYPASS)
- sch->qstats.drops++;
- kfree_skb(skb);
- return ret;
- }
-#endif
+ struct prio_sched_data *q = qdisc_priv(sch);
+ int prio;
- if ((ret = qdisc->ops->requeue(skb, qdisc)) == NET_XMIT_SUCCESS) {
- sch->q.qlen++;
- sch->qstats.requeues++;
- return NET_XMIT_SUCCESS;
+ for (prio = 0; prio < q->bands; prio++) {
+ struct Qdisc *qdisc = q->queues[prio];
+ struct sk_buff *skb = qdisc->ops->peek(qdisc);
+ if (skb)
+ return skb;
}
- if (net_xmit_drop_count(ret))
- sch->qstats.drops++;
- return ret;
+ return NULL;
}
-
static struct sk_buff *prio_dequeue(struct Qdisc* sch)
{
struct prio_sched_data *q = qdisc_priv(sch);
@@ -201,7 +187,8 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt)
memcpy(q->prio2band, qopt->priomap, TC_PRIO_MAX+1);
for (i=q->bands; i<TCQ_PRIO_BANDS; i++) {
- struct Qdisc *child = xchg(&q->queues[i], &noop_qdisc);
+ struct Qdisc *child = q->queues[i];
+ q->queues[i] = &noop_qdisc;
if (child != &noop_qdisc) {
qdisc_tree_decrease_qlen(child, child->q.qlen);
qdisc_destroy(child);
@@ -211,18 +198,19 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt)
for (i=0; i<q->bands; i++) {
if (q->queues[i] == &noop_qdisc) {
- struct Qdisc *child;
+ struct Qdisc *child, *old;
child = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
&pfifo_qdisc_ops,
TC_H_MAKE(sch->handle, i + 1));
if (child) {
sch_tree_lock(sch);
- child = xchg(&q->queues[i], child);
+ old = q->queues[i];
+ q->queues[i] = child;
- if (child != &noop_qdisc) {
- qdisc_tree_decrease_qlen(child,
- child->q.qlen);
- qdisc_destroy(child);
+ if (old != &noop_qdisc) {
+ qdisc_tree_decrease_qlen(old,
+ old->q.qlen);
+ qdisc_destroy(old);
}
sch_tree_unlock(sch);
}
@@ -421,7 +409,7 @@ static struct Qdisc_ops prio_qdisc_ops __read_mostly = {
.priv_size = sizeof(struct prio_sched_data),
.enqueue = prio_enqueue,
.dequeue = prio_dequeue,
- .requeue = prio_requeue,
+ .peek = prio_peek,
.drop = prio_drop,
.init = prio_init,
.reset = prio_reset,
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index 5da05839e225..2bdf241f6315 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -108,23 +108,6 @@ congestion_drop:
return NET_XMIT_CN;
}
-static int red_requeue(struct sk_buff *skb, struct Qdisc* sch)
-{
- struct red_sched_data *q = qdisc_priv(sch);
- struct Qdisc *child = q->qdisc;
- int ret;
-
- if (red_is_idling(&q->parms))
- red_end_of_idle_period(&q->parms);
-
- ret = child->ops->requeue(skb, child);
- if (likely(ret == NET_XMIT_SUCCESS)) {
- sch->qstats.requeues++;
- sch->q.qlen++;
- }
- return ret;
-}
-
static struct sk_buff * red_dequeue(struct Qdisc* sch)
{
struct sk_buff *skb;
@@ -140,6 +123,14 @@ static struct sk_buff * red_dequeue(struct Qdisc* sch)
return skb;
}
+static struct sk_buff * red_peek(struct Qdisc* sch)
+{
+ struct red_sched_data *q = qdisc_priv(sch);
+ struct Qdisc *child = q->qdisc;
+
+ return child->ops->peek(child);
+}
+
static unsigned int red_drop(struct Qdisc* sch)
{
struct red_sched_data *q = qdisc_priv(sch);
@@ -211,7 +202,8 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt)
q->limit = ctl->limit;
if (child) {
qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen);
- qdisc_destroy(xchg(&q->qdisc, child));
+ qdisc_destroy(q->qdisc);
+ q->qdisc = child;
}
red_set_parms(&q->parms, ctl->qth_min, ctl->qth_max, ctl->Wlog,
@@ -292,7 +284,8 @@ static int red_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
new = &noop_qdisc;
sch_tree_lock(sch);
- *old = xchg(&q->qdisc, new);
+ *old = q->qdisc;
+ q->qdisc = new;
qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
qdisc_reset(*old);
sch_tree_unlock(sch);
@@ -361,7 +354,7 @@ static struct Qdisc_ops red_qdisc_ops __read_mostly = {
.cl_ops = &red_class_ops,
.enqueue = red_enqueue,
.dequeue = red_dequeue,
- .requeue = red_requeue,
+ .peek = red_peek,
.drop = red_drop,
.init = red_init,
.reset = red_reset,
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index fe1508ef0d3d..f3965df00559 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -281,7 +281,7 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
struct sfq_sched_data *q = qdisc_priv(sch);
unsigned int hash;
sfq_index x;
- int ret;
+ int uninitialized_var(ret);
hash = sfq_classify(skb, sch, &ret);
if (hash == 0) {
@@ -329,71 +329,20 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
return NET_XMIT_CN;
}
-static int
-sfq_requeue(struct sk_buff *skb, struct Qdisc *sch)
+static struct sk_buff *
+sfq_peek(struct Qdisc *sch)
{
struct sfq_sched_data *q = qdisc_priv(sch);
- unsigned int hash;
- sfq_index x;
- int ret;
-
- hash = sfq_classify(skb, sch, &ret);
- if (hash == 0) {
- if (ret & __NET_XMIT_BYPASS)
- sch->qstats.drops++;
- kfree_skb(skb);
- return ret;
- }
- hash--;
+ sfq_index a;
- x = q->ht[hash];
- if (x == SFQ_DEPTH) {
- q->ht[hash] = x = q->dep[SFQ_DEPTH].next;
- q->hash[x] = hash;
- }
-
- sch->qstats.backlog += qdisc_pkt_len(skb);
- __skb_queue_head(&q->qs[x], skb);
- /* If selected queue has length q->limit+1, this means that
- * all another queues are empty and we do simple tail drop.
- * This packet is still requeued at head of queue, tail packet
- * is dropped.
- */
- if (q->qs[x].qlen > q->limit) {
- skb = q->qs[x].prev;
- __skb_unlink(skb, &q->qs[x]);
- sch->qstats.drops++;
- sch->qstats.backlog -= qdisc_pkt_len(skb);
- kfree_skb(skb);
- return NET_XMIT_CN;
- }
-
- sfq_inc(q, x);
- if (q->qs[x].qlen == 1) { /* The flow is new */
- if (q->tail == SFQ_DEPTH) { /* It is the first flow */
- q->tail = x;
- q->next[x] = x;
- q->allot[x] = q->quantum;
- } else {
- q->next[x] = q->next[q->tail];
- q->next[q->tail] = x;
- q->tail = x;
- }
- }
-
- if (++sch->q.qlen <= q->limit) {
- sch->qstats.requeues++;
- return 0;
- }
+ /* No active slots */
+ if (q->tail == SFQ_DEPTH)
+ return NULL;
- sch->qstats.drops++;
- sfq_drop(sch);
- return NET_XMIT_CN;
+ a = q->next[q->tail];
+ return skb_peek(&q->qs[a]);
}
-
-
-
static struct sk_buff *
sfq_dequeue(struct Qdisc *sch)
{
@@ -624,7 +573,7 @@ static struct Qdisc_ops sfq_qdisc_ops __read_mostly = {
.priv_size = sizeof(struct sfq_sched_data),
.enqueue = sfq_enqueue,
.dequeue = sfq_dequeue,
- .requeue = sfq_requeue,
+ .peek = sfq_peek,
.drop = sfq_drop,
.init = sfq_init,
.reset = sfq_reset,
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index 94c61598b86a..a2f93c09f3cc 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -139,19 +139,6 @@ static int tbf_enqueue(struct sk_buff *skb, struct Qdisc* sch)
return 0;
}
-static int tbf_requeue(struct sk_buff *skb, struct Qdisc* sch)
-{
- struct tbf_sched_data *q = qdisc_priv(sch);
- int ret;
-
- if ((ret = q->qdisc->ops->requeue(skb, q->qdisc)) == 0) {
- sch->q.qlen++;
- sch->qstats.requeues++;
- }
-
- return ret;
-}
-
static unsigned int tbf_drop(struct Qdisc* sch)
{
struct tbf_sched_data *q = qdisc_priv(sch);
@@ -169,7 +156,7 @@ static struct sk_buff *tbf_dequeue(struct Qdisc* sch)
struct tbf_sched_data *q = qdisc_priv(sch);
struct sk_buff *skb;
- skb = q->qdisc->dequeue(q->qdisc);
+ skb = q->qdisc->ops->peek(q->qdisc);
if (skb) {
psched_time_t now;
@@ -192,6 +179,10 @@ static struct sk_buff *tbf_dequeue(struct Qdisc* sch)
toks -= L2T(q, len);
if ((toks|ptoks) >= 0) {
+ skb = qdisc_dequeue_peeked(q->qdisc);
+ if (unlikely(!skb))
+ return NULL;
+
q->t_c = now;
q->tokens = toks;
q->ptokens = ptoks;
@@ -214,12 +205,6 @@ static struct sk_buff *tbf_dequeue(struct Qdisc* sch)
(cf. CSZ, HPFQ, HFSC)
*/
- if (q->qdisc->ops->requeue(skb, q->qdisc) != NET_XMIT_SUCCESS) {
- /* When requeue fails skb is dropped */
- qdisc_tree_decrease_qlen(q->qdisc, 1);
- sch->qstats.drops++;
- }
-
sch->qstats.overlimits++;
}
return NULL;
@@ -251,6 +236,7 @@ static int tbf_change(struct Qdisc* sch, struct nlattr *opt)
struct tc_tbf_qopt *qopt;
struct qdisc_rate_table *rtab = NULL;
struct qdisc_rate_table *ptab = NULL;
+ struct qdisc_rate_table *tmp;
struct Qdisc *child = NULL;
int max_size,n;
@@ -299,7 +285,8 @@ static int tbf_change(struct Qdisc* sch, struct nlattr *opt)
sch_tree_lock(sch);
if (child) {
qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen);
- qdisc_destroy(xchg(&q->qdisc, child));
+ qdisc_destroy(q->qdisc);
+ q->qdisc = child;
}
q->limit = qopt->limit;
q->mtu = qopt->mtu;
@@ -307,8 +294,14 @@ static int tbf_change(struct Qdisc* sch, struct nlattr *opt)
q->buffer = qopt->buffer;
q->tokens = q->buffer;
q->ptokens = q->mtu;
- rtab = xchg(&q->R_tab, rtab);
- ptab = xchg(&q->P_tab, ptab);
+
+ tmp = q->R_tab;
+ q->R_tab = rtab;
+ rtab = tmp;
+
+ tmp = q->P_tab;
+ q->P_tab = ptab;
+ ptab = tmp;
sch_tree_unlock(sch);
err = 0;
done:
@@ -398,7 +391,8 @@ static int tbf_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
new = &noop_qdisc;
sch_tree_lock(sch);
- *old = xchg(&q->qdisc, new);
+ *old = q->qdisc;
+ q->qdisc = new;
qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
qdisc_reset(*old);
sch_tree_unlock(sch);
@@ -469,7 +463,7 @@ static struct Qdisc_ops tbf_qdisc_ops __read_mostly = {
.priv_size = sizeof(struct tbf_sched_data),
.enqueue = tbf_enqueue,
.dequeue = tbf_dequeue,
- .requeue = tbf_requeue,
+ .peek = qdisc_peek_dequeued,
.drop = tbf_drop,
.init = tbf_init,
.reset = tbf_reset,
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index d35ef059abb1..cfc8e7caba62 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -93,16 +93,6 @@ teql_enqueue(struct sk_buff *skb, struct Qdisc* sch)
return NET_XMIT_DROP;
}
-static int
-teql_requeue(struct sk_buff *skb, struct Qdisc* sch)
-{
- struct teql_sched_data *q = qdisc_priv(sch);
-
- __skb_queue_head(&q->q, skb);
- sch->qstats.requeues++;
- return 0;
-}
-
static struct sk_buff *
teql_dequeue(struct Qdisc* sch)
{
@@ -123,6 +113,13 @@ teql_dequeue(struct Qdisc* sch)
return skb;
}
+static struct sk_buff *
+teql_peek(struct Qdisc* sch)
+{
+ /* teql is meant to be used as root qdisc */
+ return NULL;
+}
+
static __inline__ void
teql_neigh_release(struct neighbour *n)
{
@@ -433,7 +430,7 @@ static __init void teql_master_setup(struct net_device *dev)
ops->enqueue = teql_enqueue;
ops->dequeue = teql_dequeue;
- ops->requeue = teql_requeue;
+ ops->peek = teql_peek;
ops->init = teql_qdisc_init;
ops->reset = teql_reset;
ops->destroy = teql_destroy;