summaryrefslogtreecommitdiff
path: root/net/sched
diff options
context:
space:
mode:
Diffstat (limited to 'net/sched')
-rw-r--r--net/sched/Kconfig14
-rw-r--r--net/sched/Makefile1
-rw-r--r--net/sched/act_api.c16
-rw-r--r--net/sched/act_ctinfo.c19
-rw-r--r--net/sched/act_gate.c3
-rw-r--r--net/sched/act_mirred.c28
-rw-r--r--net/sched/act_tunnel_key.c8
-rw-r--r--net/sched/bpf_qdisc.c475
-rw-r--r--net/sched/cls_flow.c2
-rw-r--r--net/sched/em_meta.c2
-rw-r--r--net/sched/sch_api.c323
-rw-r--r--net/sched/sch_cake.c14
-rw-r--r--net/sched/sch_codel.c14
-rw-r--r--net/sched/sch_drr.c16
-rw-r--r--net/sched/sch_ets.c26
-rw-r--r--net/sched/sch_fq.c14
-rw-r--r--net/sched/sch_fq_codel.c14
-rw-r--r--net/sched/sch_fq_pie.c18
-rw-r--r--net/sched/sch_frag.c10
-rw-r--r--net/sched/sch_generic.c11
-rw-r--r--net/sched/sch_hfsc.c62
-rw-r--r--net/sched/sch_hhf.c14
-rw-r--r--net/sched/sch_htb.c19
-rw-r--r--net/sched/sch_mqprio.c2
-rw-r--r--net/sched/sch_netem.c40
-rw-r--r--net/sched/sch_pie.c18
-rw-r--r--net/sched/sch_prio.c2
-rw-r--r--net/sched/sch_qfq.c55
-rw-r--r--net/sched/sch_red.c8
-rw-r--r--net/sched/sch_sfq.c21
-rw-r--r--net/sched/sch_taprio.c33
-rw-r--r--net/sched/sch_tbf.c2
32 files changed, 1001 insertions, 303 deletions
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 8180d0c12fce..ad914d2b2e22 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -403,6 +403,18 @@ config NET_SCH_ETS
If unsure, say N.
+config NET_SCH_BPF
+ bool "BPF-based Qdisc"
+ depends on BPF_SYSCALL && BPF_JIT && DEBUG_INFO_BTF
+ help
+ This option allows BPF-based queueing disiplines. With BPF struct_ops,
+ users can implement supported operators in Qdisc_ops using BPF programs.
+ The queue holding skb can be built with BPF maps or graphs.
+
+ Say Y here if you want to use BPF-based Qdisc.
+
+ If unsure, say N.
+
menuconfig NET_SCH_DEFAULT
bool "Allow override default queue discipline"
help
@@ -784,7 +796,7 @@ config NET_ACT_SKBEDIT
config NET_ACT_CSUM
tristate "Checksum Updating"
depends on NET_CLS_ACT && INET
- select LIBCRC32C
+ select NET_CRC32C
help
Say Y here to update some common checksum after some direct
packet alterations.
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 82c3f78ca486..904d784902d1 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -62,6 +62,7 @@ obj-$(CONFIG_NET_SCH_FQ_PIE) += sch_fq_pie.o
obj-$(CONFIG_NET_SCH_CBS) += sch_cbs.o
obj-$(CONFIG_NET_SCH_ETF) += sch_etf.o
obj-$(CONFIG_NET_SCH_TAPRIO) += sch_taprio.o
+obj-$(CONFIG_NET_SCH_BPF) += bpf_qdisc.o
obj-$(CONFIG_NET_CLS_U32) += cls_u32.o
obj-$(CONFIG_NET_CLS_ROUTE4) += cls_route.o
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 839790043256..057e20cef375 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -1461,17 +1461,29 @@ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
struct netlink_ext_ack *extack)
{
struct tc_action_ops *ops[TCA_ACT_MAX_PRIO] = {};
- struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
+ struct nlattr *tb[TCA_ACT_MAX_PRIO + 2];
struct tc_action *act;
size_t sz = 0;
int err;
int i;
- err = nla_parse_nested_deprecated(tb, TCA_ACT_MAX_PRIO, nla, NULL,
+ err = nla_parse_nested_deprecated(tb, TCA_ACT_MAX_PRIO + 1, nla, NULL,
extack);
if (err < 0)
return err;
+ /* The nested attributes are parsed as types, but they are really an
+ * array of actions. So we parse one more than we can handle, and return
+ * an error if the last one is set (as that indicates that the request
+ * contained more than the maximum number of actions).
+ */
+ if (tb[TCA_ACT_MAX_PRIO + 1]) {
+ NL_SET_ERR_MSG_FMT(extack,
+ "Only %d actions supported per filter",
+ TCA_ACT_MAX_PRIO);
+ return -EINVAL;
+ }
+
for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
struct tc_action_ops *a_o;
diff --git a/net/sched/act_ctinfo.c b/net/sched/act_ctinfo.c
index 5b1241ddc758..93ab3bcd6d31 100644
--- a/net/sched/act_ctinfo.c
+++ b/net/sched/act_ctinfo.c
@@ -44,9 +44,9 @@ static void tcf_ctinfo_dscp_set(struct nf_conn *ct, struct tcf_ctinfo *ca,
ipv4_change_dsfield(ip_hdr(skb),
INET_ECN_MASK,
newdscp);
- ca->stats_dscp_set++;
+ atomic64_inc(&ca->stats_dscp_set);
} else {
- ca->stats_dscp_error++;
+ atomic64_inc(&ca->stats_dscp_error);
}
}
break;
@@ -57,9 +57,9 @@ static void tcf_ctinfo_dscp_set(struct nf_conn *ct, struct tcf_ctinfo *ca,
ipv6_change_dsfield(ipv6_hdr(skb),
INET_ECN_MASK,
newdscp);
- ca->stats_dscp_set++;
+ atomic64_inc(&ca->stats_dscp_set);
} else {
- ca->stats_dscp_error++;
+ atomic64_inc(&ca->stats_dscp_error);
}
}
break;
@@ -72,7 +72,7 @@ static void tcf_ctinfo_cpmark_set(struct nf_conn *ct, struct tcf_ctinfo *ca,
struct tcf_ctinfo_params *cp,
struct sk_buff *skb)
{
- ca->stats_cpmark_set++;
+ atomic64_inc(&ca->stats_cpmark_set);
skb->mark = READ_ONCE(ct->mark) & cp->cpmarkmask;
}
@@ -323,15 +323,18 @@ static int tcf_ctinfo_dump(struct sk_buff *skb, struct tc_action *a,
}
if (nla_put_u64_64bit(skb, TCA_CTINFO_STATS_DSCP_SET,
- ci->stats_dscp_set, TCA_CTINFO_PAD))
+ atomic64_read(&ci->stats_dscp_set),
+ TCA_CTINFO_PAD))
goto nla_put_failure;
if (nla_put_u64_64bit(skb, TCA_CTINFO_STATS_DSCP_ERROR,
- ci->stats_dscp_error, TCA_CTINFO_PAD))
+ atomic64_read(&ci->stats_dscp_error),
+ TCA_CTINFO_PAD))
goto nla_put_failure;
if (nla_put_u64_64bit(skb, TCA_CTINFO_STATS_CPMARK_SET,
- ci->stats_cpmark_set, TCA_CTINFO_PAD))
+ atomic64_read(&ci->stats_cpmark_set),
+ TCA_CTINFO_PAD))
goto nla_put_failure;
spin_unlock_bh(&ci->tcf_lock);
diff --git a/net/sched/act_gate.c b/net/sched/act_gate.c
index 91c0ec729823..c1f75f272757 100644
--- a/net/sched/act_gate.c
+++ b/net/sched/act_gate.c
@@ -287,8 +287,7 @@ static void gate_setup_timer(struct tcf_gate *gact, u64 basetime,
gact->param.tcfg_basetime = basetime;
gact->param.tcfg_clockid = clockid;
gact->tk_offset = tko;
- hrtimer_init(&gact->hitimer, clockid, HRTIMER_MODE_ABS_SOFT);
- gact->hitimer.function = gate_timer_func;
+ hrtimer_setup(&gact->hitimer, gate_timer_func, clockid, HRTIMER_MODE_ABS_SOFT);
}
static int tcf_gate_init(struct net *net, struct nlattr *nla,
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 5b3814365924..5f01f567c934 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -30,7 +30,29 @@ static LIST_HEAD(mirred_list);
static DEFINE_SPINLOCK(mirred_list_lock);
#define MIRRED_NEST_LIMIT 4
-static DEFINE_PER_CPU(unsigned int, mirred_nest_level);
+
+#ifndef CONFIG_PREEMPT_RT
+static u8 tcf_mirred_nest_level_inc_return(void)
+{
+ return __this_cpu_inc_return(softnet_data.xmit.sched_mirred_nest);
+}
+
+static void tcf_mirred_nest_level_dec(void)
+{
+ __this_cpu_dec(softnet_data.xmit.sched_mirred_nest);
+}
+
+#else
+static u8 tcf_mirred_nest_level_inc_return(void)
+{
+ return current->net_xmit.sched_mirred_nest++;
+}
+
+static void tcf_mirred_nest_level_dec(void)
+{
+ current->net_xmit.sched_mirred_nest--;
+}
+#endif
static bool tcf_mirred_is_act_redirect(int action)
{
@@ -423,7 +445,7 @@ TC_INDIRECT_SCOPE int tcf_mirred_act(struct sk_buff *skb,
int m_eaction;
u32 blockid;
- nest_level = __this_cpu_inc_return(mirred_nest_level);
+ nest_level = tcf_mirred_nest_level_inc_return();
if (unlikely(nest_level > MIRRED_NEST_LIMIT)) {
net_warn_ratelimited("Packet exceeded mirred recursion limit on dev %s\n",
netdev_name(skb->dev));
@@ -454,7 +476,7 @@ TC_INDIRECT_SCOPE int tcf_mirred_act(struct sk_buff *skb,
retval);
dec_nest_level:
- __this_cpu_dec(mirred_nest_level);
+ tcf_mirred_nest_level_dec();
return retval;
}
diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c
index e296714803dc..2cef4b08befb 100644
--- a/net/sched/act_tunnel_key.c
+++ b/net/sched/act_tunnel_key.c
@@ -571,8 +571,8 @@ static void tunnel_key_release(struct tc_action *a)
static int tunnel_key_geneve_opts_dump(struct sk_buff *skb,
const struct ip_tunnel_info *info)
{
+ const u8 *src = ip_tunnel_info_opts(info);
int len = info->options_len;
- u8 *src = (u8 *)(info + 1);
struct nlattr *start;
start = nla_nest_start_noflag(skb, TCA_TUNNEL_KEY_ENC_OPTS_GENEVE);
@@ -580,7 +580,7 @@ static int tunnel_key_geneve_opts_dump(struct sk_buff *skb,
return -EMSGSIZE;
while (len > 0) {
- struct geneve_opt *opt = (struct geneve_opt *)src;
+ const struct geneve_opt *opt = (const struct geneve_opt *)src;
if (nla_put_be16(skb, TCA_TUNNEL_KEY_ENC_OPT_GENEVE_CLASS,
opt->opt_class) ||
@@ -603,7 +603,7 @@ static int tunnel_key_geneve_opts_dump(struct sk_buff *skb,
static int tunnel_key_vxlan_opts_dump(struct sk_buff *skb,
const struct ip_tunnel_info *info)
{
- struct vxlan_metadata *md = (struct vxlan_metadata *)(info + 1);
+ const struct vxlan_metadata *md = ip_tunnel_info_opts(info);
struct nlattr *start;
start = nla_nest_start_noflag(skb, TCA_TUNNEL_KEY_ENC_OPTS_VXLAN);
@@ -622,7 +622,7 @@ static int tunnel_key_vxlan_opts_dump(struct sk_buff *skb,
static int tunnel_key_erspan_opts_dump(struct sk_buff *skb,
const struct ip_tunnel_info *info)
{
- struct erspan_metadata *md = (struct erspan_metadata *)(info + 1);
+ const struct erspan_metadata *md = ip_tunnel_info_opts(info);
struct nlattr *start;
start = nla_nest_start_noflag(skb, TCA_TUNNEL_KEY_ENC_OPTS_ERSPAN);
diff --git a/net/sched/bpf_qdisc.c b/net/sched/bpf_qdisc.c
new file mode 100644
index 000000000000..7ea8b54b2ab1
--- /dev/null
+++ b/net/sched/bpf_qdisc.c
@@ -0,0 +1,475 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/types.h>
+#include <linux/bpf_verifier.h>
+#include <linux/bpf.h>
+#include <linux/btf.h>
+#include <linux/filter.h>
+#include <net/pkt_sched.h>
+#include <net/pkt_cls.h>
+
+#define QDISC_OP_IDX(op) (offsetof(struct Qdisc_ops, op) / sizeof(void (*)(void)))
+#define QDISC_MOFF_IDX(moff) (moff / sizeof(void (*)(void)))
+
+static struct bpf_struct_ops bpf_Qdisc_ops;
+
+struct bpf_sched_data {
+ struct qdisc_watchdog watchdog;
+};
+
+struct bpf_sk_buff_ptr {
+ struct sk_buff *skb;
+};
+
+static int bpf_qdisc_init(struct btf *btf)
+{
+ return 0;
+}
+
+BTF_ID_LIST_SINGLE(bpf_qdisc_ids, struct, Qdisc)
+BTF_ID_LIST_SINGLE(bpf_sk_buff_ids, struct, sk_buff)
+BTF_ID_LIST_SINGLE(bpf_sk_buff_ptr_ids, struct, bpf_sk_buff_ptr)
+
+static bool bpf_qdisc_is_valid_access(int off, int size,
+ enum bpf_access_type type,
+ const struct bpf_prog *prog,
+ struct bpf_insn_access_aux *info)
+{
+ struct btf *btf = prog->aux->attach_btf;
+ u32 arg;
+
+ arg = btf_ctx_arg_idx(btf, prog->aux->attach_func_proto, off);
+ if (prog->aux->attach_st_ops_member_off == offsetof(struct Qdisc_ops, enqueue)) {
+ if (arg == 2 && type == BPF_READ) {
+ info->reg_type = PTR_TO_BTF_ID | PTR_TRUSTED;
+ info->btf = btf;
+ info->btf_id = bpf_sk_buff_ptr_ids[0];
+ return true;
+ }
+ }
+
+ return bpf_tracing_btf_ctx_access(off, size, type, prog, info);
+}
+
+static int bpf_qdisc_qdisc_access(struct bpf_verifier_log *log,
+ const struct bpf_reg_state *reg,
+ int off, size_t *end)
+{
+ switch (off) {
+ case offsetof(struct Qdisc, limit):
+ *end = offsetofend(struct Qdisc, limit);
+ break;
+ case offsetof(struct Qdisc, q) + offsetof(struct qdisc_skb_head, qlen):
+ *end = offsetof(struct Qdisc, q) + offsetofend(struct qdisc_skb_head, qlen);
+ break;
+ case offsetof(struct Qdisc, qstats) ... offsetofend(struct Qdisc, qstats) - 1:
+ *end = offsetofend(struct Qdisc, qstats);
+ break;
+ default:
+ return -EACCES;
+ }
+
+ return 0;
+}
+
+static int bpf_qdisc_sk_buff_access(struct bpf_verifier_log *log,
+ const struct bpf_reg_state *reg,
+ int off, size_t *end)
+{
+ switch (off) {
+ case offsetof(struct sk_buff, tstamp):
+ *end = offsetofend(struct sk_buff, tstamp);
+ break;
+ case offsetof(struct sk_buff, cb) + offsetof(struct qdisc_skb_cb, data[0]) ...
+ offsetof(struct sk_buff, cb) + offsetof(struct qdisc_skb_cb,
+ data[QDISC_CB_PRIV_LEN - 1]):
+ *end = offsetof(struct sk_buff, cb) +
+ offsetofend(struct qdisc_skb_cb, data[QDISC_CB_PRIV_LEN - 1]);
+ break;
+ default:
+ return -EACCES;
+ }
+
+ return 0;
+}
+
+static int bpf_qdisc_btf_struct_access(struct bpf_verifier_log *log,
+ const struct bpf_reg_state *reg,
+ int off, int size)
+{
+ const struct btf_type *t, *skbt, *qdisct;
+ size_t end;
+ int err;
+
+ skbt = btf_type_by_id(reg->btf, bpf_sk_buff_ids[0]);
+ qdisct = btf_type_by_id(reg->btf, bpf_qdisc_ids[0]);
+ t = btf_type_by_id(reg->btf, reg->btf_id);
+
+ if (t == skbt) {
+ err = bpf_qdisc_sk_buff_access(log, reg, off, &end);
+ } else if (t == qdisct) {
+ err = bpf_qdisc_qdisc_access(log, reg, off, &end);
+ } else {
+ bpf_log(log, "only read is supported\n");
+ return -EACCES;
+ }
+
+ if (err) {
+ bpf_log(log, "no write support to %s at off %d\n",
+ btf_name_by_offset(reg->btf, t->name_off), off);
+ return -EACCES;
+ }
+
+ if (off + size > end) {
+ bpf_log(log,
+ "write access at off %d with size %d beyond the member of %s ended at %zu\n",
+ off, size, btf_name_by_offset(reg->btf, t->name_off), end);
+ return -EACCES;
+ }
+
+ return 0;
+}
+
+BTF_ID_LIST(bpf_qdisc_init_prologue_ids)
+BTF_ID(func, bpf_qdisc_init_prologue)
+
+static int bpf_qdisc_gen_prologue(struct bpf_insn *insn_buf, bool direct_write,
+ const struct bpf_prog *prog)
+{
+ struct bpf_insn *insn = insn_buf;
+
+ if (prog->aux->attach_st_ops_member_off != offsetof(struct Qdisc_ops, init))
+ return 0;
+
+ /* r6 = r1; // r6 will be "u64 *ctx". r1 is "u64 *ctx".
+ * r2 = r1[16]; // r2 will be "struct netlink_ext_ack *extack"
+ * r1 = r1[0]; // r1 will be "struct Qdisc *sch"
+ * r0 = bpf_qdisc_init_prologue(r1, r2);
+ * if r0 == 0 goto pc+1;
+ * BPF_EXIT;
+ * r1 = r6; // r1 will be "u64 *ctx".
+ */
+ *insn++ = BPF_MOV64_REG(BPF_REG_6, BPF_REG_1);
+ *insn++ = BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 16);
+ *insn++ = BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, 0);
+ *insn++ = BPF_CALL_KFUNC(0, bpf_qdisc_init_prologue_ids[0]);
+ *insn++ = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1);
+ *insn++ = BPF_EXIT_INSN();
+ *insn++ = BPF_MOV64_REG(BPF_REG_1, BPF_REG_6);
+ *insn++ = prog->insnsi[0];
+
+ return insn - insn_buf;
+}
+
+BTF_ID_LIST(bpf_qdisc_reset_destroy_epilogue_ids)
+BTF_ID(func, bpf_qdisc_reset_destroy_epilogue)
+
+static int bpf_qdisc_gen_epilogue(struct bpf_insn *insn_buf, const struct bpf_prog *prog,
+ s16 ctx_stack_off)
+{
+ struct bpf_insn *insn = insn_buf;
+
+ if (prog->aux->attach_st_ops_member_off != offsetof(struct Qdisc_ops, reset) &&
+ prog->aux->attach_st_ops_member_off != offsetof(struct Qdisc_ops, destroy))
+ return 0;
+
+ /* r1 = stack[ctx_stack_off]; // r1 will be "u64 *ctx"
+ * r1 = r1[0]; // r1 will be "struct Qdisc *sch"
+ * r0 = bpf_qdisc_reset_destroy_epilogue(r1);
+ * BPF_EXIT;
+ */
+ *insn++ = BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_FP, ctx_stack_off);
+ *insn++ = BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, 0);
+ *insn++ = BPF_CALL_KFUNC(0, bpf_qdisc_reset_destroy_epilogue_ids[0]);
+ *insn++ = BPF_EXIT_INSN();
+
+ return insn - insn_buf;
+}
+
+__bpf_kfunc_start_defs();
+
+/* bpf_skb_get_hash - Get the flow hash of an skb.
+ * @skb: The skb to get the flow hash from.
+ */
+__bpf_kfunc u32 bpf_skb_get_hash(struct sk_buff *skb)
+{
+ return skb_get_hash(skb);
+}
+
+/* bpf_kfree_skb - Release an skb's reference and drop it immediately.
+ * @skb: The skb whose reference to be released and dropped.
+ */
+__bpf_kfunc void bpf_kfree_skb(struct sk_buff *skb)
+{
+ kfree_skb(skb);
+}
+
+/* bpf_qdisc_skb_drop - Drop an skb by adding it to a deferred free list.
+ * @skb: The skb whose reference to be released and dropped.
+ * @to_free_list: The list of skbs to be dropped.
+ */
+__bpf_kfunc void bpf_qdisc_skb_drop(struct sk_buff *skb,
+ struct bpf_sk_buff_ptr *to_free_list)
+{
+ __qdisc_drop(skb, (struct sk_buff **)to_free_list);
+}
+
+/* bpf_qdisc_watchdog_schedule - Schedule a qdisc to a later time using a timer.
+ * @sch: The qdisc to be scheduled.
+ * @expire: The expiry time of the timer.
+ * @delta_ns: The slack range of the timer.
+ */
+__bpf_kfunc void bpf_qdisc_watchdog_schedule(struct Qdisc *sch, u64 expire, u64 delta_ns)
+{
+ struct bpf_sched_data *q = qdisc_priv(sch);
+
+ qdisc_watchdog_schedule_range_ns(&q->watchdog, expire, delta_ns);
+}
+
+/* bpf_qdisc_init_prologue - Hidden kfunc called in prologue of .init. */
+__bpf_kfunc int bpf_qdisc_init_prologue(struct Qdisc *sch,
+ struct netlink_ext_ack *extack)
+{
+ struct bpf_sched_data *q = qdisc_priv(sch);
+ struct net_device *dev = qdisc_dev(sch);
+ struct Qdisc *p;
+
+ qdisc_watchdog_init(&q->watchdog, sch);
+
+ if (sch->parent != TC_H_ROOT) {
+ /* If qdisc_lookup() returns NULL, it means .init is called by
+ * qdisc_create_dflt() in mq/mqprio_init and the parent qdisc
+ * has not been added to qdisc_hash yet.
+ */
+ p = qdisc_lookup(dev, TC_H_MAJ(sch->parent));
+ if (p && !(p->flags & TCQ_F_MQROOT)) {
+ NL_SET_ERR_MSG(extack, "BPF qdisc only supported on root or mq");
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+/* bpf_qdisc_reset_destroy_epilogue - Hidden kfunc called in epilogue of .reset
+ * and .destroy
+ */
+__bpf_kfunc void bpf_qdisc_reset_destroy_epilogue(struct Qdisc *sch)
+{
+ struct bpf_sched_data *q = qdisc_priv(sch);
+
+ qdisc_watchdog_cancel(&q->watchdog);
+}
+
+/* bpf_qdisc_bstats_update - Update Qdisc basic statistics
+ * @sch: The qdisc from which an skb is dequeued.
+ * @skb: The skb to be dequeued.
+ */
+__bpf_kfunc void bpf_qdisc_bstats_update(struct Qdisc *sch, const struct sk_buff *skb)
+{
+ bstats_update(&sch->bstats, skb);
+}
+
+__bpf_kfunc_end_defs();
+
+BTF_KFUNCS_START(qdisc_kfunc_ids)
+BTF_ID_FLAGS(func, bpf_skb_get_hash, KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, bpf_kfree_skb, KF_RELEASE)
+BTF_ID_FLAGS(func, bpf_qdisc_skb_drop, KF_RELEASE)
+BTF_ID_FLAGS(func, bpf_dynptr_from_skb, KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, bpf_qdisc_watchdog_schedule, KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, bpf_qdisc_init_prologue, KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, bpf_qdisc_reset_destroy_epilogue, KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, bpf_qdisc_bstats_update, KF_TRUSTED_ARGS)
+BTF_KFUNCS_END(qdisc_kfunc_ids)
+
+BTF_SET_START(qdisc_common_kfunc_set)
+BTF_ID(func, bpf_skb_get_hash)
+BTF_ID(func, bpf_kfree_skb)
+BTF_ID(func, bpf_dynptr_from_skb)
+BTF_SET_END(qdisc_common_kfunc_set)
+
+BTF_SET_START(qdisc_enqueue_kfunc_set)
+BTF_ID(func, bpf_qdisc_skb_drop)
+BTF_ID(func, bpf_qdisc_watchdog_schedule)
+BTF_SET_END(qdisc_enqueue_kfunc_set)
+
+BTF_SET_START(qdisc_dequeue_kfunc_set)
+BTF_ID(func, bpf_qdisc_watchdog_schedule)
+BTF_ID(func, bpf_qdisc_bstats_update)
+BTF_SET_END(qdisc_dequeue_kfunc_set)
+
+enum qdisc_ops_kf_flags {
+ QDISC_OPS_KF_COMMON = 0,
+ QDISC_OPS_KF_ENQUEUE = 1 << 0,
+ QDISC_OPS_KF_DEQUEUE = 1 << 1,
+};
+
+static const u32 qdisc_ops_context_flags[] = {
+ [QDISC_OP_IDX(enqueue)] = QDISC_OPS_KF_ENQUEUE,
+ [QDISC_OP_IDX(dequeue)] = QDISC_OPS_KF_DEQUEUE,
+ [QDISC_OP_IDX(init)] = QDISC_OPS_KF_COMMON,
+ [QDISC_OP_IDX(reset)] = QDISC_OPS_KF_COMMON,
+ [QDISC_OP_IDX(destroy)] = QDISC_OPS_KF_COMMON,
+};
+
+static int bpf_qdisc_kfunc_filter(const struct bpf_prog *prog, u32 kfunc_id)
+{
+ u32 moff, flags;
+
+ if (!btf_id_set8_contains(&qdisc_kfunc_ids, kfunc_id))
+ return 0;
+
+ if (prog->aux->st_ops != &bpf_Qdisc_ops)
+ return -EACCES;
+
+ moff = prog->aux->attach_st_ops_member_off;
+ flags = qdisc_ops_context_flags[QDISC_MOFF_IDX(moff)];
+
+ if ((flags & QDISC_OPS_KF_ENQUEUE) &&
+ btf_id_set_contains(&qdisc_enqueue_kfunc_set, kfunc_id))
+ return 0;
+
+ if ((flags & QDISC_OPS_KF_DEQUEUE) &&
+ btf_id_set_contains(&qdisc_dequeue_kfunc_set, kfunc_id))
+ return 0;
+
+ if (btf_id_set_contains(&qdisc_common_kfunc_set, kfunc_id))
+ return 0;
+
+ return -EACCES;
+}
+
+static const struct btf_kfunc_id_set bpf_qdisc_kfunc_set = {
+ .owner = THIS_MODULE,
+ .set = &qdisc_kfunc_ids,
+ .filter = bpf_qdisc_kfunc_filter,
+};
+
+static const struct bpf_verifier_ops bpf_qdisc_verifier_ops = {
+ .get_func_proto = bpf_base_func_proto,
+ .is_valid_access = bpf_qdisc_is_valid_access,
+ .btf_struct_access = bpf_qdisc_btf_struct_access,
+ .gen_prologue = bpf_qdisc_gen_prologue,
+ .gen_epilogue = bpf_qdisc_gen_epilogue,
+};
+
+static int bpf_qdisc_init_member(const struct btf_type *t,
+ const struct btf_member *member,
+ void *kdata, const void *udata)
+{
+ const struct Qdisc_ops *uqdisc_ops;
+ struct Qdisc_ops *qdisc_ops;
+ u32 moff;
+
+ uqdisc_ops = (const struct Qdisc_ops *)udata;
+ qdisc_ops = (struct Qdisc_ops *)kdata;
+
+ moff = __btf_member_bit_offset(t, member) / 8;
+ switch (moff) {
+ case offsetof(struct Qdisc_ops, priv_size):
+ if (uqdisc_ops->priv_size)
+ return -EINVAL;
+ qdisc_ops->priv_size = sizeof(struct bpf_sched_data);
+ return 1;
+ case offsetof(struct Qdisc_ops, peek):
+ qdisc_ops->peek = qdisc_peek_dequeued;
+ return 0;
+ case offsetof(struct Qdisc_ops, id):
+ if (bpf_obj_name_cpy(qdisc_ops->id, uqdisc_ops->id,
+ sizeof(qdisc_ops->id)) <= 0)
+ return -EINVAL;
+ return 1;
+ }
+
+ return 0;
+}
+
+static int bpf_qdisc_reg(void *kdata, struct bpf_link *link)
+{
+ return register_qdisc(kdata);
+}
+
+static void bpf_qdisc_unreg(void *kdata, struct bpf_link *link)
+{
+ return unregister_qdisc(kdata);
+}
+
+static int bpf_qdisc_validate(void *kdata)
+{
+ struct Qdisc_ops *ops = (struct Qdisc_ops *)kdata;
+
+ if (!ops->enqueue || !ops->dequeue || !ops->init ||
+ !ops->reset || !ops->destroy)
+ return -EINVAL;
+
+ return 0;
+}
+
+static int Qdisc_ops__enqueue(struct sk_buff *skb__ref, struct Qdisc *sch,
+ struct sk_buff **to_free)
+{
+ return 0;
+}
+
+static struct sk_buff *Qdisc_ops__dequeue(struct Qdisc *sch)
+{
+ return NULL;
+}
+
+static int Qdisc_ops__init(struct Qdisc *sch, struct nlattr *arg,
+ struct netlink_ext_ack *extack)
+{
+ return 0;
+}
+
+static void Qdisc_ops__reset(struct Qdisc *sch)
+{
+}
+
+static void Qdisc_ops__destroy(struct Qdisc *sch)
+{
+}
+
+static struct Qdisc_ops __bpf_ops_qdisc_ops = {
+ .enqueue = Qdisc_ops__enqueue,
+ .dequeue = Qdisc_ops__dequeue,
+ .init = Qdisc_ops__init,
+ .reset = Qdisc_ops__reset,
+ .destroy = Qdisc_ops__destroy,
+};
+
+static struct bpf_struct_ops bpf_Qdisc_ops = {
+ .verifier_ops = &bpf_qdisc_verifier_ops,
+ .reg = bpf_qdisc_reg,
+ .unreg = bpf_qdisc_unreg,
+ .validate = bpf_qdisc_validate,
+ .init_member = bpf_qdisc_init_member,
+ .init = bpf_qdisc_init,
+ .name = "Qdisc_ops",
+ .cfi_stubs = &__bpf_ops_qdisc_ops,
+ .owner = THIS_MODULE,
+};
+
+BTF_ID_LIST(bpf_sk_buff_dtor_ids)
+BTF_ID(func, bpf_kfree_skb)
+
+static int __init bpf_qdisc_kfunc_init(void)
+{
+ int ret;
+ const struct btf_id_dtor_kfunc skb_kfunc_dtors[] = {
+ {
+ .btf_id = bpf_sk_buff_ids[0],
+ .kfunc_btf_id = bpf_sk_buff_dtor_ids[0]
+ },
+ };
+
+ ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &bpf_qdisc_kfunc_set);
+ ret = ret ?: register_btf_id_dtor_kfuncs(skb_kfunc_dtors,
+ ARRAY_SIZE(skb_kfunc_dtors),
+ THIS_MODULE);
+ ret = ret ?: register_bpf_struct_ops(&bpf_Qdisc_ops, Qdisc_ops);
+
+ return ret;
+}
+late_initcall(bpf_qdisc_kfunc_init);
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index 5c2580a07530..5693b41b093f 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -345,7 +345,7 @@ TC_INDIRECT_SCOPE int flow_classify(struct sk_buff *skb,
static void flow_perturbation(struct timer_list *t)
{
- struct flow_filter *f = from_timer(f, t, perturb_timer);
+ struct flow_filter *f = timer_container_of(f, t, perturb_timer);
get_random_bytes(&f->hashrnd, 4);
if (f->perturb_period)
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c
index 8996c73c9779..3f2e707a11d1 100644
--- a/net/sched/em_meta.c
+++ b/net/sched/em_meta.c
@@ -460,7 +460,7 @@ META_COLLECTOR(int_sk_fwd_alloc)
*err = -1;
return;
}
- dst->value = sk_forward_alloc_get(sk);
+ dst->value = READ_ONCE(sk->sk_forward_alloc);
}
META_COLLECTOR(int_sk_sndbuf)
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 6c625dcd0651..d7c767b861a4 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -25,7 +25,9 @@
#include <linux/hrtimer.h>
#include <linux/slab.h>
#include <linux/hashtable.h>
+#include <linux/bpf.h>
+#include <net/netdev_lock.h>
#include <net/net_namespace.h>
#include <net/sock.h>
#include <net/netlink.h>
@@ -206,7 +208,7 @@ static struct Qdisc_ops *qdisc_lookup_default(const char *name)
for (q = qdisc_base; q; q = q->next) {
if (!strcmp(name, q->id)) {
- if (!try_module_get(q->owner))
+ if (!bpf_try_module_get(q, q->owner))
q = NULL;
break;
}
@@ -236,7 +238,7 @@ int qdisc_set_default(const char *name)
if (ops) {
/* Set new default */
- module_put(default_qdisc_ops->owner);
+ bpf_module_put(default_qdisc_ops, default_qdisc_ops->owner);
default_qdisc_ops = ops;
}
write_unlock(&qdisc_mod_lock);
@@ -334,17 +336,22 @@ out:
return q;
}
-static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
+static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid,
+ struct netlink_ext_ack *extack)
{
unsigned long cl;
const struct Qdisc_class_ops *cops = p->ops->cl_ops;
- if (cops == NULL)
- return NULL;
+ if (cops == NULL) {
+ NL_SET_ERR_MSG(extack, "Parent qdisc is not classful");
+ return ERR_PTR(-EOPNOTSUPP);
+ }
cl = cops->find(p, classid);
- if (cl == 0)
- return NULL;
+ if (cl == 0) {
+ NL_SET_ERR_MSG(extack, "Specified class not found");
+ return ERR_PTR(-ENOENT);
+ }
return cops->leaf(p, cl);
}
@@ -358,7 +365,7 @@ static struct Qdisc_ops *qdisc_lookup_ops(struct nlattr *kind)
read_lock(&qdisc_mod_lock);
for (q = qdisc_base; q; q = q->next) {
if (nla_strcmp(kind, q->id) == 0) {
- if (!try_module_get(q->owner))
+ if (!bpf_try_module_get(q, q->owner))
q = NULL;
break;
}
@@ -594,16 +601,6 @@ out:
qdisc_skb_cb(skb)->pkt_len = pkt_len;
}
-void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc)
-{
- if (!(qdisc->flags & TCQ_F_WARN_NONWC)) {
- pr_warn("%s: %s qdisc %X: is non-work-conserving?\n",
- txt, qdisc->ops->id, qdisc->handle >> 16);
- qdisc->flags |= TCQ_F_WARN_NONWC;
- }
-}
-EXPORT_SYMBOL(qdisc_warn_nonwc);
-
static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
{
struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
@@ -619,8 +616,7 @@ static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
void qdisc_watchdog_init_clockid(struct qdisc_watchdog *wd, struct Qdisc *qdisc,
clockid_t clockid)
{
- hrtimer_init(&wd->timer, clockid, HRTIMER_MODE_ABS_PINNED);
- wd->timer.function = qdisc_watchdog;
+ hrtimer_setup(&wd->timer, qdisc_watchdog, clockid, HRTIMER_MODE_ABS_PINNED);
wd->qdisc = qdisc;
}
EXPORT_SYMBOL(qdisc_watchdog_init_clockid);
@@ -779,15 +775,12 @@ static u32 qdisc_alloc_handle(struct net_device *dev)
void qdisc_tree_reduce_backlog(struct Qdisc *sch, int n, int len)
{
- bool qdisc_is_offloaded = sch->flags & TCQ_F_OFFLOADED;
const struct Qdisc_class_ops *cops;
unsigned long cl;
u32 parentid;
bool notify;
int drops;
- if (n == 0 && len == 0)
- return;
drops = max_t(int, n, 0);
rcu_read_lock();
while ((parentid = sch->parent)) {
@@ -796,17 +789,8 @@ void qdisc_tree_reduce_backlog(struct Qdisc *sch, int n, int len)
if (sch->flags & TCQ_F_NOPARENT)
break;
- /* Notify parent qdisc only if child qdisc becomes empty.
- *
- * If child was empty even before update then backlog
- * counter is screwed and we skip notification because
- * parent class is already passive.
- *
- * If the original child was offloaded then it is allowed
- * to be seem as empty, so the parent is notified anyway.
- */
- notify = !sch->q.qlen && !WARN_ON_ONCE(!n &&
- !qdisc_is_offloaded);
+ /* Notify parent qdisc only if child qdisc becomes empty. */
+ notify = !sch->q.qlen;
/* TODO: perform the search on a per txq basis */
sch = qdisc_lookup_rcu(qdisc_dev(sch), TC_H_MAJ(parentid));
if (sch == NULL) {
@@ -815,6 +799,9 @@ void qdisc_tree_reduce_backlog(struct Qdisc *sch, int n, int len)
}
cops = sch->ops->cl_ops;
if (notify && cops->qlen_notify) {
+ /* Note that qlen_notify must be idempotent as it may get called
+ * multiple times.
+ */
cl = cops->find(sch, parentid);
cops->qlen_notify(sch, cl);
}
@@ -1267,36 +1254,8 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
struct qdisc_size_table *stab;
ops = qdisc_lookup_ops(kind);
-#ifdef CONFIG_MODULES
- if (ops == NULL && kind != NULL) {
- char name[IFNAMSIZ];
- if (nla_strscpy(name, kind, IFNAMSIZ) >= 0) {
- /* We dropped the RTNL semaphore in order to
- * perform the module load. So, even if we
- * succeeded in loading the module we have to
- * tell the caller to replay the request. We
- * indicate this using -EAGAIN.
- * We replay the request because the device may
- * go away in the mean time.
- */
- rtnl_unlock();
- request_module(NET_SCH_ALIAS_PREFIX "%s", name);
- rtnl_lock();
- ops = qdisc_lookup_ops(kind);
- if (ops != NULL) {
- /* We will try again qdisc_lookup_ops,
- * so don't keep a reference.
- */
- module_put(ops->owner);
- err = -EAGAIN;
- goto err_out;
- }
- }
- }
-#endif
-
- err = -ENOENT;
if (!ops) {
+ err = -ENOENT;
NL_SET_ERR_MSG(extack, "Specified qdisc kind is unknown");
goto err_out;
}
@@ -1398,7 +1357,7 @@ err_out3:
netdev_put(dev, &sch->dev_tracker);
qdisc_free(sch);
err_out2:
- module_put(ops->owner);
+ bpf_module_put(ops, ops->owner);
err_out:
*errp = err;
return NULL;
@@ -1505,27 +1464,18 @@ const struct nla_policy rtm_tca_policy[TCA_MAX + 1] = {
* Delete/get qdisc.
*/
-static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
- struct netlink_ext_ack *extack)
+static int __tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
+ struct netlink_ext_ack *extack,
+ struct net_device *dev,
+ struct nlattr *tca[TCA_MAX + 1],
+ struct tcmsg *tcm)
{
struct net *net = sock_net(skb->sk);
- struct tcmsg *tcm = nlmsg_data(n);
- struct nlattr *tca[TCA_MAX + 1];
- struct net_device *dev;
- u32 clid;
struct Qdisc *q = NULL;
struct Qdisc *p = NULL;
+ u32 clid;
int err;
- err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX,
- rtm_tca_policy, extack);
- if (err < 0)
- return err;
-
- dev = __dev_get_by_index(net, tcm->tcm_ifindex);
- if (!dev)
- return -ENODEV;
-
clid = tcm->tcm_parent;
if (clid) {
if (clid != TC_H_ROOT) {
@@ -1535,7 +1485,7 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
NL_SET_ERR_MSG(extack, "Failed to find qdisc with specified classid");
return -ENOENT;
}
- q = qdisc_leaf(p, clid);
+ q = qdisc_leaf(p, clid, extack);
} else if (dev_ingress_queue(dev)) {
q = rtnl_dereference(dev_ingress_queue(dev)->qdisc_sleeping);
}
@@ -1546,6 +1496,8 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
NL_SET_ERR_MSG(extack, "Cannot find specified qdisc on specified device");
return -ENOENT;
}
+ if (IS_ERR(q))
+ return PTR_ERR(q);
if (tcm->tcm_handle && q->handle != tcm->tcm_handle) {
NL_SET_ERR_MSG(extack, "Invalid handle");
@@ -1582,6 +1534,31 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
return 0;
}
+static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
+ struct netlink_ext_ack *extack)
+{
+ struct net *net = sock_net(skb->sk);
+ struct tcmsg *tcm = nlmsg_data(n);
+ struct nlattr *tca[TCA_MAX + 1];
+ struct net_device *dev;
+ int err;
+
+ err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX,
+ rtm_tca_policy, extack);
+ if (err < 0)
+ return err;
+
+ dev = __dev_get_by_index(net, tcm->tcm_ifindex);
+ if (!dev)
+ return -ENODEV;
+
+ netdev_lock_ops(dev);
+ err = __tc_get_qdisc(skb, n, extack, dev, tca, tcm);
+ netdev_unlock_ops(dev);
+
+ return err;
+}
+
static bool req_create_or_replace(struct nlmsghdr *n)
{
return (n->nlmsg_flags & NLM_F_CREATE &&
@@ -1601,35 +1578,18 @@ static bool req_change(struct nlmsghdr *n)
!(n->nlmsg_flags & NLM_F_EXCL));
}
-/*
- * Create/change qdisc.
- */
-static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
- struct netlink_ext_ack *extack)
+static int __tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
+ struct netlink_ext_ack *extack,
+ struct net_device *dev,
+ struct nlattr *tca[TCA_MAX + 1],
+ struct tcmsg *tcm)
{
- struct net *net = sock_net(skb->sk);
- struct tcmsg *tcm;
- struct nlattr *tca[TCA_MAX + 1];
- struct net_device *dev;
+ struct Qdisc *q = NULL;
+ struct Qdisc *p = NULL;
u32 clid;
- struct Qdisc *q, *p;
int err;
-replay:
- /* Reinit, just in case something touches this. */
- err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX,
- rtm_tca_policy, extack);
- if (err < 0)
- return err;
-
- tcm = nlmsg_data(n);
clid = tcm->tcm_parent;
- q = p = NULL;
-
- dev = __dev_get_by_index(net, tcm->tcm_ifindex);
- if (!dev)
- return -ENODEV;
-
if (clid) {
if (clid != TC_H_ROOT) {
@@ -1639,7 +1599,9 @@ replay:
NL_SET_ERR_MSG(extack, "Failed to find specified qdisc");
return -ENOENT;
}
- q = qdisc_leaf(p, clid);
+ q = qdisc_leaf(p, clid, extack);
+ if (IS_ERR(q))
+ return PTR_ERR(q);
} else if (dev_ingress_queue_create(dev)) {
q = rtnl_dereference(dev_ingress_queue(dev)->qdisc_sleeping);
}
@@ -1755,7 +1717,7 @@ replay:
}
err = qdisc_change(q, tca, extack);
if (err == 0)
- qdisc_notify(net, skb, n, clid, NULL, q, extack);
+ qdisc_notify(sock_net(skb->sk), skb, n, clid, NULL, q, extack);
return err;
create_n_graft:
@@ -1787,11 +1749,8 @@ create_n_graft2:
tcm->tcm_parent, tcm->tcm_handle,
tca, &err, extack);
}
- if (q == NULL) {
- if (err == -EAGAIN)
- goto replay;
+ if (!q)
return err;
- }
graft:
err = qdisc_graft(dev, p, skb, n, clid, q, NULL, extack);
@@ -1804,6 +1763,58 @@ graft:
return 0;
}
+static void request_qdisc_module(struct nlattr *kind)
+{
+ struct Qdisc_ops *ops;
+ char name[IFNAMSIZ];
+
+ if (!kind)
+ return;
+
+ ops = qdisc_lookup_ops(kind);
+ if (ops) {
+ bpf_module_put(ops, ops->owner);
+ return;
+ }
+
+ if (nla_strscpy(name, kind, IFNAMSIZ) >= 0) {
+ rtnl_unlock();
+ request_module(NET_SCH_ALIAS_PREFIX "%s", name);
+ rtnl_lock();
+ }
+}
+
+/*
+ * Create/change qdisc.
+ */
+static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
+ struct netlink_ext_ack *extack)
+{
+ struct net *net = sock_net(skb->sk);
+ struct nlattr *tca[TCA_MAX + 1];
+ struct net_device *dev;
+ struct tcmsg *tcm;
+ int err;
+
+ err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX,
+ rtm_tca_policy, extack);
+ if (err < 0)
+ return err;
+
+ request_qdisc_module(tca[TCA_KIND]);
+
+ tcm = nlmsg_data(n);
+ dev = __dev_get_by_index(net, tcm->tcm_ifindex);
+ if (!dev)
+ return -ENODEV;
+
+ netdev_lock_ops(dev);
+ err = __tc_modify_qdisc(skb, n, extack, dev, tca, tcm);
+ netdev_unlock_ops(dev);
+
+ return err;
+}
+
static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
struct netlink_callback *cb,
int *q_idx_p, int s_q_idx, bool recur,
@@ -1888,17 +1899,23 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
s_q_idx = 0;
q_idx = 0;
+ netdev_lock_ops(dev);
if (tc_dump_qdisc_root(rtnl_dereference(dev->qdisc),
skb, cb, &q_idx, s_q_idx,
- true, tca[TCA_DUMP_INVISIBLE]) < 0)
+ true, tca[TCA_DUMP_INVISIBLE]) < 0) {
+ netdev_unlock_ops(dev);
goto done;
+ }
dev_queue = dev_ingress_queue(dev);
if (dev_queue &&
tc_dump_qdisc_root(rtnl_dereference(dev_queue->qdisc_sleeping),
skb, cb, &q_idx, s_q_idx, false,
- tca[TCA_DUMP_INVISIBLE]) < 0)
+ tca[TCA_DUMP_INVISIBLE]) < 0) {
+ netdev_unlock_ops(dev);
goto done;
+ }
+ netdev_unlock_ops(dev);
cont:
idx++;
@@ -2135,15 +2152,15 @@ static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid,
#endif
-static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
- struct netlink_ext_ack *extack)
+static int __tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
+ struct netlink_ext_ack *extack,
+ struct net_device *dev,
+ struct nlattr *tca[TCA_MAX + 1],
+ struct tcmsg *tcm)
{
struct net *net = sock_net(skb->sk);
- struct tcmsg *tcm = nlmsg_data(n);
- struct nlattr *tca[TCA_MAX + 1];
- struct net_device *dev;
- struct Qdisc *q = NULL;
const struct Qdisc_class_ops *cops;
+ struct Qdisc *q = NULL;
unsigned long cl = 0;
unsigned long new_cl;
u32 portid;
@@ -2151,15 +2168,6 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
u32 qid;
int err;
- err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX,
- rtm_tca_policy, extack);
- if (err < 0)
- return err;
-
- dev = __dev_get_by_index(net, tcm->tcm_ifindex);
- if (!dev)
- return -ENODEV;
-
/*
parent == TC_H_UNSPEC - unspecified parent.
parent == TC_H_ROOT - class is root, which has no parent.
@@ -2274,6 +2282,31 @@ out:
return err;
}
+static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
+ struct netlink_ext_ack *extack)
+{
+ struct net *net = sock_net(skb->sk);
+ struct tcmsg *tcm = nlmsg_data(n);
+ struct nlattr *tca[TCA_MAX + 1];
+ struct net_device *dev;
+ int err;
+
+ err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX,
+ rtm_tca_policy, extack);
+ if (err < 0)
+ return err;
+
+ dev = __dev_get_by_index(net, tcm->tcm_ifindex);
+ if (!dev)
+ return -ENODEV;
+
+ netdev_lock_ops(dev);
+ err = __tc_ctl_tclass(skb, n, extack, dev, tca, tcm);
+ netdev_unlock_ops(dev);
+
+ return err;
+}
+
struct qdisc_dump_args {
struct qdisc_walker w;
struct sk_buff *skb;
@@ -2350,20 +2383,12 @@ static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
return 0;
}
-static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
+static int __tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb,
+ struct tcmsg *tcm, struct net_device *dev)
{
- struct tcmsg *tcm = nlmsg_data(cb->nlh);
- struct net *net = sock_net(skb->sk);
struct netdev_queue *dev_queue;
- struct net_device *dev;
int t, s_t;
- if (nlmsg_len(cb->nlh) < sizeof(*tcm))
- return 0;
- dev = dev_get_by_index(net, tcm->tcm_ifindex);
- if (!dev)
- return 0;
-
s_t = cb->args[0];
t = 0;
@@ -2380,10 +2405,32 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
done:
cb->args[0] = t;
- dev_put(dev);
return skb->len;
}
+static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct tcmsg *tcm = nlmsg_data(cb->nlh);
+ struct net *net = sock_net(skb->sk);
+ struct net_device *dev;
+ int err;
+
+ if (nlmsg_len(cb->nlh) < sizeof(*tcm))
+ return 0;
+
+ dev = dev_get_by_index(net, tcm->tcm_ifindex);
+ if (!dev)
+ return 0;
+
+ netdev_lock_ops(dev);
+ err = __tc_dump_tclass(skb, cb, tcm, dev);
+ netdev_unlock_ops(dev);
+
+ dev_put(dev);
+
+ return err;
+}
+
#ifdef CONFIG_PROC_FS
static int psched_show(struct seq_file *seq, void *v)
{
diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c
index 48dd8c88903f..aa9f31e4415a 100644
--- a/net/sched/sch_cake.c
+++ b/net/sched/sch_cake.c
@@ -1747,7 +1747,7 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch,
ktime_t now = ktime_get();
struct cake_tin_data *b;
struct cake_flow *flow;
- u32 idx;
+ u32 idx, tin;
/* choose flow to insert into */
idx = cake_classify(sch, &b, skb, q->flow_mode, &ret);
@@ -1757,6 +1757,7 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch,
__qdisc_drop(skb, to_free);
return ret;
}
+ tin = (u32)(b - q->tins);
idx--;
flow = &b->flows[idx];
@@ -1924,13 +1925,22 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch,
q->buffer_max_used = q->buffer_used;
if (q->buffer_used > q->buffer_limit) {
+ bool same_flow = false;
u32 dropped = 0;
+ u32 drop_id;
while (q->buffer_used > q->buffer_limit) {
dropped++;
- cake_drop(sch, to_free);
+ drop_id = cake_drop(sch, to_free);
+
+ if ((drop_id >> 16) == tin &&
+ (drop_id & 0xFFFF) == idx)
+ same_flow = true;
}
b->drop_overlimit += dropped;
+
+ if (same_flow)
+ return NET_XMIT_CN;
}
return NET_XMIT_SUCCESS;
}
diff --git a/net/sched/sch_codel.c b/net/sched/sch_codel.c
index 12dd71139da3..fa0314679e43 100644
--- a/net/sched/sch_codel.c
+++ b/net/sched/sch_codel.c
@@ -101,9 +101,9 @@ static const struct nla_policy codel_policy[TCA_CODEL_MAX + 1] = {
static int codel_change(struct Qdisc *sch, struct nlattr *opt,
struct netlink_ext_ack *extack)
{
+ unsigned int dropped_pkts = 0, dropped_bytes = 0;
struct codel_sched_data *q = qdisc_priv(sch);
struct nlattr *tb[TCA_CODEL_MAX + 1];
- unsigned int qlen, dropped = 0;
int err;
err = nla_parse_nested_deprecated(tb, TCA_CODEL_MAX, opt,
@@ -142,15 +142,17 @@ static int codel_change(struct Qdisc *sch, struct nlattr *opt,
WRITE_ONCE(q->params.ecn,
!!nla_get_u32(tb[TCA_CODEL_ECN]));
- qlen = sch->q.qlen;
while (sch->q.qlen > sch->limit) {
- struct sk_buff *skb = __qdisc_dequeue_head(&sch->q);
+ struct sk_buff *skb = qdisc_dequeue_internal(sch, true);
- dropped += qdisc_pkt_len(skb);
- qdisc_qstats_backlog_dec(sch, skb);
+ if (!skb)
+ break;
+
+ dropped_pkts++;
+ dropped_bytes += qdisc_pkt_len(skb);
rtnl_qdisc_drop(skb, sch);
}
- qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen, dropped);
+ qdisc_tree_reduce_backlog(sch, dropped_pkts, dropped_bytes);
sch_tree_unlock(sch);
return 0;
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index c69b999fae17..9b6d79bd8737 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -35,6 +35,11 @@ struct drr_sched {
struct Qdisc_class_hash clhash;
};
+static bool cl_is_active(struct drr_class *cl)
+{
+ return !list_empty(&cl->alist);
+}
+
static struct drr_class *drr_find_class(struct Qdisc *sch, u32 classid)
{
struct drr_sched *q = qdisc_priv(sch);
@@ -105,6 +110,7 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
return -ENOBUFS;
gnet_stats_basic_sync_init(&cl->bstats);
+ INIT_LIST_HEAD(&cl->alist);
cl->common.classid = classid;
cl->quantum = quantum;
cl->qdisc = qdisc_create_dflt(sch->dev_queue,
@@ -229,7 +235,7 @@ static void drr_qlen_notify(struct Qdisc *csh, unsigned long arg)
{
struct drr_class *cl = (struct drr_class *)arg;
- list_del(&cl->alist);
+ list_del_init(&cl->alist);
}
static int drr_dump_class(struct Qdisc *sch, unsigned long arg,
@@ -336,7 +342,6 @@ static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch,
struct drr_sched *q = qdisc_priv(sch);
struct drr_class *cl;
int err = 0;
- bool first;
cl = drr_classify(skb, sch, &err);
if (cl == NULL) {
@@ -346,7 +351,6 @@ static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch,
return err;
}
- first = !cl->qdisc->q.qlen;
err = qdisc_enqueue(skb, cl->qdisc, to_free);
if (unlikely(err != NET_XMIT_SUCCESS)) {
if (net_xmit_drop_count(err)) {
@@ -356,7 +360,7 @@ static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch,
return err;
}
- if (first) {
+ if (!cl_is_active(cl)) {
list_add_tail(&cl->alist, &q->active);
cl->deficit = cl->quantum;
}
@@ -390,7 +394,7 @@ static struct sk_buff *drr_dequeue(struct Qdisc *sch)
if (unlikely(skb == NULL))
goto out;
if (cl->qdisc->q.qlen == 0)
- list_del(&cl->alist);
+ list_del_init(&cl->alist);
bstats_update(&cl->bstats, skb);
qdisc_bstats_update(sch, skb);
@@ -431,7 +435,7 @@ static void drr_reset_qdisc(struct Qdisc *sch)
for (i = 0; i < q->clhash.hashsize; i++) {
hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
if (cl->qdisc->q.qlen)
- list_del(&cl->alist);
+ list_del_init(&cl->alist);
qdisc_reset(cl->qdisc);
}
}
diff --git a/net/sched/sch_ets.c b/net/sched/sch_ets.c
index 516038a44163..82635dd2cfa5 100644
--- a/net/sched/sch_ets.c
+++ b/net/sched/sch_ets.c
@@ -74,6 +74,11 @@ static const struct nla_policy ets_class_policy[TCA_ETS_MAX + 1] = {
[TCA_ETS_QUANTA_BAND] = { .type = NLA_U32 },
};
+static bool cl_is_active(struct ets_class *cl)
+{
+ return !list_empty(&cl->alist);
+}
+
static int ets_quantum_parse(struct Qdisc *sch, const struct nlattr *attr,
unsigned int *quantum,
struct netlink_ext_ack *extack)
@@ -293,7 +298,7 @@ static void ets_class_qlen_notify(struct Qdisc *sch, unsigned long arg)
* to remove them.
*/
if (!ets_class_is_strict(q, cl) && sch->q.qlen)
- list_del(&cl->alist);
+ list_del_init(&cl->alist);
}
static int ets_class_dump(struct Qdisc *sch, unsigned long arg,
@@ -416,7 +421,6 @@ static int ets_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
struct ets_sched *q = qdisc_priv(sch);
struct ets_class *cl;
int err = 0;
- bool first;
cl = ets_classify(skb, sch, &err);
if (!cl) {
@@ -426,7 +430,6 @@ static int ets_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
return err;
}
- first = !cl->qdisc->q.qlen;
err = qdisc_enqueue(skb, cl->qdisc, to_free);
if (unlikely(err != NET_XMIT_SUCCESS)) {
if (net_xmit_drop_count(err)) {
@@ -436,7 +439,7 @@ static int ets_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
return err;
}
- if (first && !ets_class_is_strict(q, cl)) {
+ if (!cl_is_active(cl) && !ets_class_is_strict(q, cl)) {
list_add_tail(&cl->alist, &q->active);
cl->deficit = cl->quantum;
}
@@ -488,7 +491,7 @@ static struct sk_buff *ets_qdisc_dequeue(struct Qdisc *sch)
if (unlikely(!skb))
goto out;
if (cl->qdisc->q.qlen == 0)
- list_del(&cl->alist);
+ list_del_init(&cl->alist);
return ets_qdisc_dequeue_skb(sch, skb);
}
@@ -648,6 +651,12 @@ static int ets_qdisc_change(struct Qdisc *sch, struct nlattr *opt,
sch_tree_lock(sch);
+ for (i = nbands; i < oldbands; i++) {
+ if (i >= q->nstrict && q->classes[i].qdisc->q.qlen)
+ list_del_init(&q->classes[i].alist);
+ qdisc_purge_queue(q->classes[i].qdisc);
+ }
+
WRITE_ONCE(q->nbands, nbands);
for (i = nstrict; i < q->nstrict; i++) {
if (q->classes[i].qdisc->q.qlen) {
@@ -655,11 +664,6 @@ static int ets_qdisc_change(struct Qdisc *sch, struct nlattr *opt,
q->classes[i].deficit = quanta[i];
}
}
- for (i = q->nbands; i < oldbands; i++) {
- if (i >= q->nstrict && q->classes[i].qdisc->q.qlen)
- list_del(&q->classes[i].alist);
- qdisc_tree_flush_backlog(q->classes[i].qdisc);
- }
WRITE_ONCE(q->nstrict, nstrict);
memcpy(q->prio2band, priomap, sizeof(priomap));
@@ -713,7 +717,7 @@ static void ets_qdisc_reset(struct Qdisc *sch)
for (band = q->nstrict; band < q->nbands; band++) {
if (q->classes[band].qdisc->q.qlen)
- list_del(&q->classes[band].alist);
+ list_del_init(&q->classes[band].alist);
}
for (band = 0; band < q->nbands; band++)
qdisc_reset(q->classes[band].qdisc);
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index 2ca5332cfcc5..fee922da2f99 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -1013,11 +1013,11 @@ static int fq_load_priomap(struct fq_sched_data *q,
static int fq_change(struct Qdisc *sch, struct nlattr *opt,
struct netlink_ext_ack *extack)
{
+ unsigned int dropped_pkts = 0, dropped_bytes = 0;
struct fq_sched_data *q = qdisc_priv(sch);
struct nlattr *tb[TCA_FQ_MAX + 1];
- int err, drop_count = 0;
- unsigned drop_len = 0;
u32 fq_log;
+ int err;
err = nla_parse_nested_deprecated(tb, TCA_FQ_MAX, opt, fq_policy,
NULL);
@@ -1135,16 +1135,18 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt,
err = fq_resize(sch, fq_log);
sch_tree_lock(sch);
}
+
while (sch->q.qlen > sch->limit) {
- struct sk_buff *skb = fq_dequeue(sch);
+ struct sk_buff *skb = qdisc_dequeue_internal(sch, false);
if (!skb)
break;
- drop_len += qdisc_pkt_len(skb);
+
+ dropped_pkts++;
+ dropped_bytes += qdisc_pkt_len(skb);
rtnl_kfree_skbs(skb, skb);
- drop_count++;
}
- qdisc_tree_reduce_backlog(sch, drop_count, drop_len);
+ qdisc_tree_reduce_backlog(sch, dropped_pkts, dropped_bytes);
sch_tree_unlock(sch);
return err;
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index 6c9029f71e88..a14142392939 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -366,6 +366,7 @@ static const struct nla_policy fq_codel_policy[TCA_FQ_CODEL_MAX + 1] = {
static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt,
struct netlink_ext_ack *extack)
{
+ unsigned int dropped_pkts = 0, dropped_bytes = 0;
struct fq_codel_sched_data *q = qdisc_priv(sch);
struct nlattr *tb[TCA_FQ_CODEL_MAX + 1];
u32 quantum = 0;
@@ -441,15 +442,16 @@ static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt,
while (sch->q.qlen > sch->limit ||
q->memory_usage > q->memory_limit) {
- struct sk_buff *skb = fq_codel_dequeue(sch);
+ struct sk_buff *skb = qdisc_dequeue_internal(sch, false);
- q->cstats.drop_len += qdisc_pkt_len(skb);
+ if (!skb)
+ break;
+
+ dropped_pkts++;
+ dropped_bytes += qdisc_pkt_len(skb);
rtnl_kfree_skbs(skb, skb);
- q->cstats.drop_count++;
}
- qdisc_tree_reduce_backlog(sch, q->cstats.drop_count, q->cstats.drop_len);
- q->cstats.drop_count = 0;
- q->cstats.drop_len = 0;
+ qdisc_tree_reduce_backlog(sch, dropped_pkts, dropped_bytes);
sch_tree_unlock(sch);
return 0;
diff --git a/net/sched/sch_fq_pie.c b/net/sched/sch_fq_pie.c
index 93c36afbf576..7b96bc3ff891 100644
--- a/net/sched/sch_fq_pie.c
+++ b/net/sched/sch_fq_pie.c
@@ -287,10 +287,9 @@ begin:
static int fq_pie_change(struct Qdisc *sch, struct nlattr *opt,
struct netlink_ext_ack *extack)
{
+ unsigned int dropped_pkts = 0, dropped_bytes = 0;
struct fq_pie_sched_data *q = qdisc_priv(sch);
struct nlattr *tb[TCA_FQ_PIE_MAX + 1];
- unsigned int len_dropped = 0;
- unsigned int num_dropped = 0;
int err;
err = nla_parse_nested(tb, TCA_FQ_PIE_MAX, opt, fq_pie_policy, extack);
@@ -366,13 +365,16 @@ static int fq_pie_change(struct Qdisc *sch, struct nlattr *opt,
/* Drop excess packets if new limit is lower */
while (sch->q.qlen > sch->limit) {
- struct sk_buff *skb = fq_pie_qdisc_dequeue(sch);
+ struct sk_buff *skb = qdisc_dequeue_internal(sch, false);
- len_dropped += qdisc_pkt_len(skb);
- num_dropped += 1;
+ if (!skb)
+ break;
+
+ dropped_pkts++;
+ dropped_bytes += qdisc_pkt_len(skb);
rtnl_kfree_skbs(skb, skb);
}
- qdisc_tree_reduce_backlog(sch, num_dropped, len_dropped);
+ qdisc_tree_reduce_backlog(sch, dropped_pkts, dropped_bytes);
sch_tree_unlock(sch);
return 0;
@@ -384,7 +386,7 @@ flow_error:
static void fq_pie_timer(struct timer_list *t)
{
- struct fq_pie_sched_data *q = from_timer(q, t, adapt_timer);
+ struct fq_pie_sched_data *q = timer_container_of(q, t, adapt_timer);
unsigned long next, tupdate;
struct Qdisc *sch = q->sch;
spinlock_t *root_lock; /* to lock qdisc for probability calculations */
@@ -555,7 +557,7 @@ static void fq_pie_destroy(struct Qdisc *sch)
tcf_block_put(q->block);
q->p_params.tupdate = 0;
- del_timer_sync(&q->adapt_timer);
+ timer_delete_sync(&q->adapt_timer);
kvfree(q->flows);
}
diff --git a/net/sched/sch_frag.c b/net/sched/sch_frag.c
index ce63414185fd..d1d87dce7f3f 100644
--- a/net/sched/sch_frag.c
+++ b/net/sched/sch_frag.c
@@ -16,14 +16,18 @@ struct sch_frag_data {
unsigned int l2_len;
u8 l2_data[VLAN_ETH_HLEN];
int (*xmit)(struct sk_buff *skb);
+ local_lock_t bh_lock;
};
-static DEFINE_PER_CPU(struct sch_frag_data, sch_frag_data_storage);
+static DEFINE_PER_CPU(struct sch_frag_data, sch_frag_data_storage) = {
+ .bh_lock = INIT_LOCAL_LOCK(bh_lock),
+};
static int sch_frag_xmit(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct sch_frag_data *data = this_cpu_ptr(&sch_frag_data_storage);
+ lockdep_assert_held(&data->bh_lock);
if (skb_cow_head(skb, data->l2_len) < 0) {
kfree_skb(skb);
return -ENOMEM;
@@ -95,6 +99,7 @@ static int sch_fragment(struct net *net, struct sk_buff *skb,
struct rtable sch_frag_rt = { 0 };
unsigned long orig_dst;
+ local_lock_nested_bh(&sch_frag_data_storage.bh_lock);
sch_frag_prepare_frag(skb, xmit);
dst_init(&sch_frag_rt.dst, &sch_frag_dst_ops, NULL,
DST_OBSOLETE_NONE, DST_NOCOUNT);
@@ -105,11 +110,13 @@ static int sch_fragment(struct net *net, struct sk_buff *skb,
IPCB(skb)->frag_max_size = mru;
ret = ip_do_fragment(net, skb->sk, skb, sch_frag_xmit);
+ local_unlock_nested_bh(&sch_frag_data_storage.bh_lock);
refdst_drop(orig_dst);
} else if (skb_protocol(skb, true) == htons(ETH_P_IPV6)) {
unsigned long orig_dst;
struct rt6_info sch_frag_rt;
+ local_lock_nested_bh(&sch_frag_data_storage.bh_lock);
sch_frag_prepare_frag(skb, xmit);
memset(&sch_frag_rt, 0, sizeof(sch_frag_rt));
dst_init(&sch_frag_rt.dst, &sch_frag_dst_ops, NULL,
@@ -122,6 +129,7 @@ static int sch_fragment(struct net *net, struct sk_buff *skb,
ret = ipv6_stub->ipv6_fragment(net, skb->sk, skb,
sch_frag_xmit);
+ local_unlock_nested_bh(&sch_frag_data_storage.bh_lock);
refdst_drop(orig_dst);
} else {
net_warn_ratelimited("Fail frag %s: eth=%x, MRU=%d, MTU=%d\n",
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 14ab2f4c190a..16afb834fe4a 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -24,6 +24,7 @@
#include <linux/if_vlan.h>
#include <linux/skb_array.h>
#include <linux/if_macvlan.h>
+#include <linux/bpf.h>
#include <net/sch_generic.h>
#include <net/pkt_sched.h>
#include <net/dst.h>
@@ -495,7 +496,7 @@ EXPORT_SYMBOL(netif_tx_unlock);
static void dev_watchdog(struct timer_list *t)
{
- struct net_device *dev = from_timer(dev, t, watchdog_timer);
+ struct net_device *dev = timer_container_of(dev, t, watchdog_timer);
bool release = true;
spin_lock(&dev->tx_global_lock);
@@ -567,7 +568,7 @@ EXPORT_SYMBOL_GPL(netdev_watchdog_up);
static void netdev_watchdog_down(struct net_device *dev)
{
netif_tx_lock_bh(dev);
- if (del_timer(&dev->watchdog_timer))
+ if (timer_delete(&dev->watchdog_timer))
netdev_put(dev, &dev->watchdog_dev_tracker);
netif_tx_unlock_bh(dev);
}
@@ -1001,14 +1002,14 @@ struct Qdisc *qdisc_create_dflt(struct netdev_queue *dev_queue,
{
struct Qdisc *sch;
- if (!try_module_get(ops->owner)) {
+ if (!bpf_try_module_get(ops, ops->owner)) {
NL_SET_ERR_MSG(extack, "Failed to increase module reference counter");
return NULL;
}
sch = qdisc_alloc(dev_queue, ops, extack);
if (IS_ERR(sch)) {
- module_put(ops->owner);
+ bpf_module_put(ops, ops->owner);
return NULL;
}
sch->parent = parentid;
@@ -1078,7 +1079,7 @@ static void __qdisc_destroy(struct Qdisc *qdisc)
ops->destroy(qdisc);
lockdep_unregister_key(&qdisc->root_lock_key);
- module_put(ops->owner);
+ bpf_module_put(ops, ops->owner);
netdev_put(dev, &qdisc->dev_tracker);
trace_qdisc_destroy(qdisc);
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index c287bf8423b4..d8fd35da32a7 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -175,6 +175,11 @@ struct hfsc_sched {
#define HT_INFINITY 0xffffffffffffffffULL /* infinite time value */
+static bool cl_in_el_or_vttree(struct hfsc_class *cl)
+{
+ return ((cl->cl_flags & HFSC_FSC) && cl->cl_nactive) ||
+ ((cl->cl_flags & HFSC_RSC) && !RB_EMPTY_NODE(&cl->el_node));
+}
/*
* eligible tree holds backlogged classes being sorted by their eligible times.
@@ -203,7 +208,10 @@ eltree_insert(struct hfsc_class *cl)
static inline void
eltree_remove(struct hfsc_class *cl)
{
- rb_erase(&cl->el_node, &cl->sched->eligible);
+ if (!RB_EMPTY_NODE(&cl->el_node)) {
+ rb_erase(&cl->el_node, &cl->sched->eligible);
+ RB_CLEAR_NODE(&cl->el_node);
+ }
}
static inline void
@@ -827,22 +835,6 @@ update_vf(struct hfsc_class *cl, unsigned int len, u64 cur_time)
}
}
-static unsigned int
-qdisc_peek_len(struct Qdisc *sch)
-{
- struct sk_buff *skb;
- unsigned int len;
-
- skb = sch->ops->peek(sch);
- if (unlikely(skb == NULL)) {
- qdisc_warn_nonwc("qdisc_peek_len", sch);
- return 0;
- }
- len = qdisc_pkt_len(skb);
-
- return len;
-}
-
static void
hfsc_adjust_levels(struct hfsc_class *cl)
{
@@ -958,6 +950,7 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
if (cl != NULL) {
int old_flags;
+ int len = 0;
if (parentid) {
if (cl->cl_parent &&
@@ -988,9 +981,13 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
if (usc != NULL)
hfsc_change_usc(cl, usc, cur_time);
+ if (cl->qdisc->q.qlen != 0)
+ len = qdisc_peek_len(cl->qdisc);
+ /* Check queue length again since some qdisc implementations
+ * (e.g., netem/codel) might empty the queue during the peek
+ * operation.
+ */
if (cl->qdisc->q.qlen != 0) {
- int len = qdisc_peek_len(cl->qdisc);
-
if (cl->cl_flags & HFSC_RSC) {
if (old_flags & HFSC_RSC)
update_ed(cl, len);
@@ -1032,6 +1029,8 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
if (cl == NULL)
return -ENOBUFS;
+ RB_CLEAR_NODE(&cl->el_node);
+
err = tcf_block_get(&cl->block, &cl->filter_list, sch, extack);
if (err) {
kfree(cl);
@@ -1220,7 +1219,8 @@ hfsc_qlen_notify(struct Qdisc *sch, unsigned long arg)
/* vttree is now handled in update_vf() so that update_vf(cl, 0, 0)
* needs to be called explicitly to remove a class from vttree.
*/
- update_vf(cl, 0, 0);
+ if (cl->cl_nactive)
+ update_vf(cl, 0, 0);
if (cl->cl_flags & HFSC_RSC)
eltree_remove(cl);
}
@@ -1560,7 +1560,10 @@ hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free)
return err;
}
- if (first) {
+ sch->qstats.backlog += len;
+ sch->q.qlen++;
+
+ if (first && !cl_in_el_or_vttree(cl)) {
if (cl->cl_flags & HFSC_RSC)
init_ed(cl, len);
if (cl->cl_flags & HFSC_FSC)
@@ -1575,9 +1578,6 @@ hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free)
}
- sch->qstats.backlog += len;
- sch->q.qlen++;
-
return NET_XMIT_SUCCESS;
}
@@ -1632,10 +1632,16 @@ hfsc_dequeue(struct Qdisc *sch)
if (cl->qdisc->q.qlen != 0) {
/* update ed */
next_len = qdisc_peek_len(cl->qdisc);
- if (realtime)
- update_ed(cl, next_len);
- else
- update_d(cl, next_len);
+ /* Check queue length again since some qdisc implementations
+ * (e.g., netem/codel) might empty the queue during the peek
+ * operation.
+ */
+ if (cl->qdisc->q.qlen != 0) {
+ if (realtime)
+ update_ed(cl, next_len);
+ else
+ update_d(cl, next_len);
+ }
} else {
/* the class becomes passive */
eltree_remove(cl);
diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c
index 44d9efe1a96a..2d4855e28a28 100644
--- a/net/sched/sch_hhf.c
+++ b/net/sched/sch_hhf.c
@@ -508,9 +508,9 @@ static const struct nla_policy hhf_policy[TCA_HHF_MAX + 1] = {
static int hhf_change(struct Qdisc *sch, struct nlattr *opt,
struct netlink_ext_ack *extack)
{
+ unsigned int dropped_pkts = 0, dropped_bytes = 0;
struct hhf_sched_data *q = qdisc_priv(sch);
struct nlattr *tb[TCA_HHF_MAX + 1];
- unsigned int qlen, prev_backlog;
int err;
u64 non_hh_quantum;
u32 new_quantum = q->quantum;
@@ -561,15 +561,17 @@ static int hhf_change(struct Qdisc *sch, struct nlattr *opt,
usecs_to_jiffies(us));
}
- qlen = sch->q.qlen;
- prev_backlog = sch->qstats.backlog;
while (sch->q.qlen > sch->limit) {
- struct sk_buff *skb = hhf_dequeue(sch);
+ struct sk_buff *skb = qdisc_dequeue_internal(sch, false);
+
+ if (!skb)
+ break;
+ dropped_pkts++;
+ dropped_bytes += qdisc_pkt_len(skb);
rtnl_kfree_skbs(skb, skb);
}
- qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen,
- prev_backlog - sch->qstats.backlog);
+ qdisc_tree_reduce_backlog(sch, dropped_pkts, dropped_bytes);
sch_tree_unlock(sch);
return 0;
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index c31bc5489bdd..b5e40c51655a 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -348,7 +348,8 @@ static void htb_add_to_wait_tree(struct htb_sched *q,
*/
static inline void htb_next_rb_node(struct rb_node **n)
{
- *n = rb_next(*n);
+ if (*n)
+ *n = rb_next(*n);
}
/**
@@ -591,7 +592,7 @@ htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, s64 *diff)
*/
static inline void htb_activate(struct htb_sched *q, struct htb_class *cl)
{
- WARN_ON(cl->level || !cl->leaf.q || !cl->leaf.q->q.qlen);
+ WARN_ON(cl->level || !cl->leaf.q);
if (!cl->prio_activity) {
cl->prio_activity = 1 << cl->prio;
@@ -609,8 +610,8 @@ static inline void htb_activate(struct htb_sched *q, struct htb_class *cl)
*/
static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl)
{
- WARN_ON(!cl->prio_activity);
-
+ if (!cl->prio_activity)
+ return;
htb_deactivate_prios(q, cl);
cl->prio_activity = 0;
}
@@ -820,7 +821,9 @@ static struct htb_class *htb_lookup_leaf(struct htb_prio *hprio, const int prio)
u32 *pid;
} stk[TC_HTB_MAXDEPTH], *sp = stk;
- BUG_ON(!hprio->row.rb_node);
+ if (unlikely(!hprio->row.rb_node))
+ return NULL;
+
sp->root = hprio->row.rb_node;
sp->pptr = &hprio->ptr;
sp->pid = &hprio->last_ptr_id;
@@ -1738,8 +1741,7 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg,
if (cl->parent)
cl->parent->children--;
- if (cl->prio_activity)
- htb_deactivate(q, cl);
+ htb_deactivate(q, cl);
if (cl->cmode != HTB_CAN_SEND)
htb_safe_rb_erase(&cl->pq_node,
@@ -1947,8 +1949,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
/* turn parent into inner node */
qdisc_purge_queue(parent->leaf.q);
parent_qdisc = parent->leaf.q;
- if (parent->prio_activity)
- htb_deactivate(q, parent);
+ htb_deactivate(q, parent);
/* remove from evt list because of level change */
if (parent->cmode != HTB_CAN_SEND) {
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
index 51d4013b6121..f3e5ef9a9592 100644
--- a/net/sched/sch_mqprio.c
+++ b/net/sched/sch_mqprio.c
@@ -152,7 +152,7 @@ static int mqprio_parse_opt(struct net_device *dev, struct tc_mqprio_qopt *qopt,
static const struct
nla_policy mqprio_tc_entry_policy[TCA_MQPRIO_TC_ENTRY_MAX + 1] = {
[TCA_MQPRIO_TC_ENTRY_INDEX] = NLA_POLICY_MAX(NLA_U32,
- TC_QOPT_MAX_QUEUE),
+ TC_QOPT_MAX_QUEUE - 1),
[TCA_MQPRIO_TC_ENTRY_FP] = NLA_POLICY_RANGE(NLA_U32,
TC_FP_EXPRESS,
TC_FP_PREEMPTIBLE),
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index fdd79d3ccd8c..eafc316ae319 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -973,6 +973,41 @@ static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla,
return 0;
}
+static const struct Qdisc_class_ops netem_class_ops;
+
+static int check_netem_in_tree(struct Qdisc *sch, bool duplicates,
+ struct netlink_ext_ack *extack)
+{
+ struct Qdisc *root, *q;
+ unsigned int i;
+
+ root = qdisc_root_sleeping(sch);
+
+ if (sch != root && root->ops->cl_ops == &netem_class_ops) {
+ if (duplicates ||
+ ((struct netem_sched_data *)qdisc_priv(root))->duplicate)
+ goto err;
+ }
+
+ if (!qdisc_dev(root))
+ return 0;
+
+ hash_for_each(qdisc_dev(root)->qdisc_hash, i, q, hash) {
+ if (sch != q && q->ops->cl_ops == &netem_class_ops) {
+ if (duplicates ||
+ ((struct netem_sched_data *)qdisc_priv(q))->duplicate)
+ goto err;
+ }
+ }
+
+ return 0;
+
+err:
+ NL_SET_ERR_MSG(extack,
+ "netem: cannot mix duplicating netems with other netems in tree");
+ return -EINVAL;
+}
+
/* Parse netlink message to set options */
static int netem_change(struct Qdisc *sch, struct nlattr *opt,
struct netlink_ext_ack *extack)
@@ -1031,6 +1066,11 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt,
q->gap = qopt->gap;
q->counter = 0;
q->loss = qopt->loss;
+
+ ret = check_netem_in_tree(sch, qopt->duplicate, extack);
+ if (ret)
+ goto unlock;
+
q->duplicate = qopt->duplicate;
/* for compatibility with earlier versions.
diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c
index bb1fa9aa530b..0a377313b6a9 100644
--- a/net/sched/sch_pie.c
+++ b/net/sched/sch_pie.c
@@ -141,9 +141,9 @@ static const struct nla_policy pie_policy[TCA_PIE_MAX + 1] = {
static int pie_change(struct Qdisc *sch, struct nlattr *opt,
struct netlink_ext_ack *extack)
{
+ unsigned int dropped_pkts = 0, dropped_bytes = 0;
struct pie_sched_data *q = qdisc_priv(sch);
struct nlattr *tb[TCA_PIE_MAX + 1];
- unsigned int qlen, dropped = 0;
int err;
err = nla_parse_nested_deprecated(tb, TCA_PIE_MAX, opt, pie_policy,
@@ -193,15 +193,17 @@ static int pie_change(struct Qdisc *sch, struct nlattr *opt,
nla_get_u32(tb[TCA_PIE_DQ_RATE_ESTIMATOR]));
/* Drop excess packets if new limit is lower */
- qlen = sch->q.qlen;
while (sch->q.qlen > sch->limit) {
- struct sk_buff *skb = __qdisc_dequeue_head(&sch->q);
+ struct sk_buff *skb = qdisc_dequeue_internal(sch, true);
- dropped += qdisc_pkt_len(skb);
- qdisc_qstats_backlog_dec(sch, skb);
+ if (!skb)
+ break;
+
+ dropped_pkts++;
+ dropped_bytes += qdisc_pkt_len(skb);
rtnl_qdisc_drop(skb, sch);
}
- qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen, dropped);
+ qdisc_tree_reduce_backlog(sch, dropped_pkts, dropped_bytes);
sch_tree_unlock(sch);
return 0;
@@ -424,7 +426,7 @@ EXPORT_SYMBOL_GPL(pie_calculate_probability);
static void pie_timer(struct timer_list *t)
{
- struct pie_sched_data *q = from_timer(q, t, adapt_timer);
+ struct pie_sched_data *q = timer_container_of(q, t, adapt_timer);
struct Qdisc *sch = q->sch;
spinlock_t *root_lock;
@@ -545,7 +547,7 @@ static void pie_destroy(struct Qdisc *sch)
struct pie_sched_data *q = qdisc_priv(sch);
q->params.tupdate = 0;
- del_timer_sync(&q->adapt_timer);
+ timer_delete_sync(&q->adapt_timer);
}
static struct Qdisc_ops pie_qdisc_ops __read_mostly = {
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index cc30f7a32f1a..9e2b9a490db2 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -211,7 +211,7 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt,
memcpy(q->prio2band, qopt->priomap, TC_PRIO_MAX+1);
for (i = q->bands; i < oldbands; i++)
- qdisc_tree_flush_backlog(q->queues[i]);
+ qdisc_purge_queue(q->queues[i]);
for (i = oldbands; i < q->bands; i++) {
q->queues[i] = queues[i];
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index 6a07cdbdb9e1..2255355e51d3 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -202,6 +202,11 @@ struct qfq_sched {
*/
enum update_reason {enqueue, requeue};
+static bool cl_is_active(struct qfq_class *cl)
+{
+ return !list_empty(&cl->alist);
+}
+
static struct qfq_class *qfq_find_class(struct Qdisc *sch, u32 classid)
{
struct qfq_sched *q = qdisc_priv(sch);
@@ -347,7 +352,7 @@ static void qfq_deactivate_class(struct qfq_sched *q, struct qfq_class *cl)
struct qfq_aggregate *agg = cl->agg;
- list_del(&cl->alist); /* remove from RR queue of the aggregate */
+ list_del_init(&cl->alist); /* remove from RR queue of the aggregate */
if (list_empty(&agg->active)) /* agg is now inactive */
qfq_deactivate_agg(q, agg);
}
@@ -407,7 +412,7 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
bool existing = false;
struct nlattr *tb[TCA_QFQ_MAX + 1];
struct qfq_aggregate *new_agg = NULL;
- u32 weight, lmax, inv_w;
+ u32 weight, lmax, inv_w, old_weight, old_lmax;
int err;
int delta_w;
@@ -438,16 +443,20 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
inv_w = ONE_FP / weight;
weight = ONE_FP / inv_w;
- if (cl != NULL &&
- lmax == cl->agg->lmax &&
- weight == cl->agg->class_weight)
- return 0; /* nothing to change */
+ if (cl != NULL) {
+ sch_tree_lock(sch);
+ old_weight = cl->agg->class_weight;
+ old_lmax = cl->agg->lmax;
+ sch_tree_unlock(sch);
+ if (lmax == old_lmax && weight == old_weight)
+ return 0; /* nothing to change */
+ }
- delta_w = weight - (cl ? cl->agg->class_weight : 0);
+ delta_w = weight - (cl ? old_weight : 0);
if (q->wsum + delta_w > QFQ_MAX_WSUM) {
NL_SET_ERR_MSG_FMT_MOD(extack,
- "total weight out of range (%d + %u)\n",
+ "total weight out of range (%d + %u)",
delta_w, q->wsum);
return -EINVAL;
}
@@ -474,6 +483,7 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
gnet_stats_basic_sync_init(&cl->bstats);
cl->common.classid = classid;
cl->deficit = lmax;
+ INIT_LIST_HEAD(&cl->alist);
cl->qdisc = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
classid, NULL);
@@ -526,9 +536,6 @@ destroy_class:
static void qfq_destroy_class(struct Qdisc *sch, struct qfq_class *cl)
{
- struct qfq_sched *q = qdisc_priv(sch);
-
- qfq_rm_from_agg(q, cl);
gen_kill_estimator(&cl->rate_est);
qdisc_put(cl->qdisc);
kfree(cl);
@@ -549,6 +556,7 @@ static int qfq_delete_class(struct Qdisc *sch, unsigned long arg,
qdisc_purge_queue(cl->qdisc);
qdisc_class_hash_remove(&q->clhash, &cl->common);
+ qfq_rm_from_agg(q, cl);
sch_tree_unlock(sch);
@@ -619,6 +627,7 @@ static int qfq_dump_class(struct Qdisc *sch, unsigned long arg,
{
struct qfq_class *cl = (struct qfq_class *)arg;
struct nlattr *nest;
+ u32 class_weight, lmax;
tcm->tcm_parent = TC_H_ROOT;
tcm->tcm_handle = cl->common.classid;
@@ -627,8 +636,13 @@ static int qfq_dump_class(struct Qdisc *sch, unsigned long arg,
nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
if (nest == NULL)
goto nla_put_failure;
- if (nla_put_u32(skb, TCA_QFQ_WEIGHT, cl->agg->class_weight) ||
- nla_put_u32(skb, TCA_QFQ_LMAX, cl->agg->lmax))
+
+ sch_tree_lock(sch);
+ class_weight = cl->agg->class_weight;
+ lmax = cl->agg->lmax;
+ sch_tree_unlock(sch);
+ if (nla_put_u32(skb, TCA_QFQ_WEIGHT, class_weight) ||
+ nla_put_u32(skb, TCA_QFQ_LMAX, lmax))
goto nla_put_failure;
return nla_nest_end(skb, nest);
@@ -645,8 +659,10 @@ static int qfq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
memset(&xstats, 0, sizeof(xstats));
+ sch_tree_lock(sch);
xstats.weight = cl->agg->class_weight;
xstats.lmax = cl->agg->lmax;
+ sch_tree_unlock(sch);
if (gnet_stats_copy_basic(d, NULL, &cl->bstats, true) < 0 ||
gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
@@ -982,8 +998,8 @@ static struct sk_buff *agg_dequeue(struct qfq_aggregate *agg,
cl->deficit -= (int) len;
if (cl->qdisc->q.qlen == 0) /* no more packets, remove from list */
- list_del(&cl->alist);
- else if (cl->deficit < qdisc_pkt_len(cl->qdisc->ops->peek(cl->qdisc))) {
+ list_del_init(&cl->alist);
+ else if (cl->deficit < qdisc_peek_len(cl->qdisc)) {
cl->deficit += agg->lmax;
list_move_tail(&cl->alist, &agg->active);
}
@@ -1214,7 +1230,6 @@ static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
struct qfq_class *cl;
struct qfq_aggregate *agg;
int err = 0;
- bool first;
cl = qfq_classify(skb, sch, &err);
if (cl == NULL) {
@@ -1236,7 +1251,6 @@ static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
}
gso_segs = skb_is_gso(skb) ? skb_shinfo(skb)->gso_segs : 1;
- first = !cl->qdisc->q.qlen;
err = qdisc_enqueue(skb, cl->qdisc, to_free);
if (unlikely(err != NET_XMIT_SUCCESS)) {
pr_debug("qfq_enqueue: enqueue failed %d\n", err);
@@ -1252,8 +1266,8 @@ static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
++sch->q.qlen;
agg = cl->agg;
- /* if the queue was not empty, then done here */
- if (!first) {
+ /* if the class is active, then done here */
+ if (cl_is_active(cl)) {
if (unlikely(skb == cl->qdisc->ops->peek(cl->qdisc)) &&
list_first_entry(&agg->active, struct qfq_class, alist)
== cl && cl->deficit < len)
@@ -1415,6 +1429,8 @@ static void qfq_qlen_notify(struct Qdisc *sch, unsigned long arg)
struct qfq_sched *q = qdisc_priv(sch);
struct qfq_class *cl = (struct qfq_class *)arg;
+ if (list_empty(&cl->alist))
+ return;
qfq_deactivate_class(q, cl);
}
@@ -1485,6 +1501,7 @@ static void qfq_destroy_qdisc(struct Qdisc *sch)
for (i = 0; i < q->clhash.hashsize; i++) {
hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i],
common.hnode) {
+ qfq_rm_from_agg(q, cl);
qfq_destroy_class(sch, cl);
}
}
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index ef8a2afed26b..479c42d11083 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -218,7 +218,7 @@ static void red_destroy(struct Qdisc *sch)
tcf_qevent_destroy(&q->qe_mark, sch);
tcf_qevent_destroy(&q->qe_early_drop, sch);
- del_timer_sync(&q->adapt_timer);
+ timer_delete_sync(&q->adapt_timer);
red_offload(sch, false);
qdisc_put(q->qdisc);
}
@@ -285,7 +285,7 @@ static int __red_change(struct Qdisc *sch, struct nlattr **tb,
q->userbits = userbits;
q->limit = ctl->limit;
if (child) {
- qdisc_tree_flush_backlog(q->qdisc);
+ qdisc_purge_queue(q->qdisc);
old_child = q->qdisc;
q->qdisc = child;
}
@@ -297,7 +297,7 @@ static int __red_change(struct Qdisc *sch, struct nlattr **tb,
max_P);
red_set_vars(&q->vars);
- del_timer(&q->adapt_timer);
+ timer_delete(&q->adapt_timer);
if (ctl->flags & TC_RED_ADAPTATIVE)
mod_timer(&q->adapt_timer, jiffies + HZ/2);
@@ -321,7 +321,7 @@ unlock_out:
static inline void red_adaptative_timer(struct timer_list *t)
{
- struct red_sched_data *q = from_timer(q, t, adapt_timer);
+ struct red_sched_data *q = timer_container_of(q, t, adapt_timer);
struct Qdisc *sch = q->sch;
spinlock_t *root_lock;
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 58b42dcf8f20..96eb2f122973 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -310,7 +310,10 @@ drop:
/* It is difficult to believe, but ALL THE SLOTS HAVE LENGTH 1. */
x = q->tail->next;
slot = &q->slots[x];
- q->tail->next = slot->next;
+ if (slot->next == x)
+ q->tail = NULL; /* no more active slots */
+ else
+ q->tail->next = slot->next;
q->ht[slot->hash] = SFQ_EMPTY_SLOT;
goto drop;
}
@@ -597,7 +600,7 @@ drop:
static void sfq_perturbation(struct timer_list *t)
{
- struct sfq_sched_data *q = from_timer(q, t, perturb_timer);
+ struct sfq_sched_data *q = timer_container_of(q, t, perturb_timer);
struct Qdisc *sch = q->sch;
spinlock_t *root_lock;
siphash_key_t nkey;
@@ -653,6 +656,14 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt,
NL_SET_ERR_MSG_MOD(extack, "invalid quantum");
return -EINVAL;
}
+
+ if (ctl->perturb_period < 0 ||
+ ctl->perturb_period > INT_MAX / HZ) {
+ NL_SET_ERR_MSG_MOD(extack, "invalid perturb period");
+ return -EINVAL;
+ }
+ perturb_period = ctl->perturb_period * HZ;
+
if (ctl_v1 && !red_check_params(ctl_v1->qth_min, ctl_v1->qth_max,
ctl_v1->Wlog, ctl_v1->Scell_log, NULL))
return -EINVAL;
@@ -669,14 +680,12 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt,
headdrop = q->headdrop;
maxdepth = q->maxdepth;
maxflows = q->maxflows;
- perturb_period = q->perturb_period;
quantum = q->quantum;
flags = q->flags;
/* update and validate configuration */
if (ctl->quantum)
quantum = ctl->quantum;
- perturb_period = ctl->perturb_period * HZ;
if (ctl->flows)
maxflows = min_t(u32, ctl->flows, SFQ_MAX_FLOWS);
if (ctl->divisor) {
@@ -730,7 +739,7 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt,
rtnl_kfree_skbs(to_free, tail);
qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen, dropped);
- del_timer(&q->perturb_timer);
+ timer_delete(&q->perturb_timer);
if (q->perturb_period) {
mod_timer(&q->perturb_timer, jiffies + q->perturb_period);
get_random_bytes(&q->perturbation, sizeof(q->perturbation));
@@ -756,7 +765,7 @@ static void sfq_destroy(struct Qdisc *sch)
tcf_block_put(q->block);
WRITE_ONCE(q->perturb_period, 0);
- del_timer_sync(&q->perturb_timer);
+ timer_delete_sync(&q->perturb_timer);
sfq_free(q->ht);
sfq_free(q->slots);
kfree(q->red_parms);
diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
index a68e17891b0b..85d84f39e220 100644
--- a/net/sched/sch_taprio.c
+++ b/net/sched/sch_taprio.c
@@ -43,6 +43,11 @@ static struct static_key_false taprio_have_working_mqprio;
#define TAPRIO_SUPPORTED_FLAGS \
(TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST | TCA_TAPRIO_ATTR_FLAG_FULL_OFFLOAD)
#define TAPRIO_FLAGS_INVALID U32_MAX
+/* Minimum value for picos_per_byte to ensure non-zero duration
+ * for minimum-sized Ethernet frames (ETH_ZLEN = 60).
+ * 60 * 17 > PSEC_PER_NSEC (1000)
+ */
+#define TAPRIO_PICOS_PER_BYTE_MIN 17
struct sched_entry {
/* Durations between this GCL entry and the GCL entry where the
@@ -1284,7 +1289,8 @@ static void taprio_start_sched(struct Qdisc *sch,
}
static void taprio_set_picos_per_byte(struct net_device *dev,
- struct taprio_sched *q)
+ struct taprio_sched *q,
+ struct netlink_ext_ack *extack)
{
struct ethtool_link_ksettings ecmd;
int speed = SPEED_10;
@@ -1300,6 +1306,15 @@ static void taprio_set_picos_per_byte(struct net_device *dev,
skip:
picos_per_byte = (USEC_PER_SEC * 8) / speed;
+ if (picos_per_byte < TAPRIO_PICOS_PER_BYTE_MIN) {
+ if (!extack)
+ pr_warn("Link speed %d is too high. Schedule may be inaccurate.\n",
+ speed);
+ NL_SET_ERR_MSG_FMT_MOD(extack,
+ "Link speed %d is too high. Schedule may be inaccurate.",
+ speed);
+ picos_per_byte = TAPRIO_PICOS_PER_BYTE_MIN;
+ }
atomic64_set(&q->picos_per_byte, picos_per_byte);
netdev_dbg(dev, "taprio: set %s's picos_per_byte to: %lld, linkspeed: %d\n",
@@ -1324,17 +1339,19 @@ static int taprio_dev_notifier(struct notifier_block *nb, unsigned long event,
if (dev != qdisc_dev(q->root))
continue;
- taprio_set_picos_per_byte(dev, q);
+ taprio_set_picos_per_byte(dev, q, NULL);
stab = rtnl_dereference(q->root->stab);
- oper = rtnl_dereference(q->oper_sched);
+ rcu_read_lock();
+ oper = rcu_dereference(q->oper_sched);
if (oper)
taprio_update_queue_max_sdu(q, oper, stab);
- admin = rtnl_dereference(q->admin_sched);
+ admin = rcu_dereference(q->admin_sched);
if (admin)
taprio_update_queue_max_sdu(q, admin, stab);
+ rcu_read_unlock();
break;
}
@@ -1846,7 +1863,7 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
q->flags = taprio_flags;
/* Needed for length_to_duration() during netlink attribute parsing */
- taprio_set_picos_per_byte(dev, q);
+ taprio_set_picos_per_byte(dev, q, extack);
err = taprio_parse_mqprio_opt(dev, mqprio, extack, q->flags);
if (err < 0)
@@ -1932,8 +1949,7 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
if (!TXTIME_ASSIST_IS_ENABLED(q->flags) &&
!FULL_OFFLOAD_IS_ENABLED(q->flags) &&
!hrtimer_active(&q->advance_timer)) {
- hrtimer_init(&q->advance_timer, q->clockid, HRTIMER_MODE_ABS);
- q->advance_timer.function = advance_sched;
+ hrtimer_setup(&q->advance_timer, advance_sched, q->clockid, HRTIMER_MODE_ABS);
}
err = taprio_get_start_time(sch, new_admin, &start);
@@ -2056,8 +2072,7 @@ static int taprio_init(struct Qdisc *sch, struct nlattr *opt,
spin_lock_init(&q->current_entry_lock);
- hrtimer_init(&q->advance_timer, CLOCK_TAI, HRTIMER_MODE_ABS);
- q->advance_timer.function = advance_sched;
+ hrtimer_setup(&q->advance_timer, advance_sched, CLOCK_TAI, HRTIMER_MODE_ABS);
q->root = sch;
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index dc26b22d53c7..4c977f049670 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -452,7 +452,7 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt,
sch_tree_lock(sch);
if (child) {
- qdisc_tree_flush_backlog(q->qdisc);
+ qdisc_purge_queue(q->qdisc);
old = q->qdisc;
q->qdisc = child;
}