diff options
Diffstat (limited to 'net/sched')
-rw-r--r-- | net/sched/act_api.c | 106 | ||||
-rw-r--r-- | net/sched/act_ct.c | 2 | ||||
-rw-r--r-- | net/sched/cls_api.c | 12 | ||||
-rw-r--r-- | net/sched/cls_flower.c | 71 | ||||
-rw-r--r-- | net/sched/cls_tcindex.c | 8 | ||||
-rw-r--r-- | net/sched/em_nbyte.c | 2 | ||||
-rw-r--r-- | net/sched/sch_api.c | 10 | ||||
-rw-r--r-- | net/sched/sch_atm.c | 3 | ||||
-rw-r--r-- | net/sched/sch_cbq.c | 3 | ||||
-rw-r--r-- | net/sched/sch_drr.c | 3 | ||||
-rw-r--r-- | net/sched/sch_dsmark.c | 3 | ||||
-rw-r--r-- | net/sched/sch_hfsc.c | 3 | ||||
-rw-r--r-- | net/sched/sch_htb.c | 557 | ||||
-rw-r--r-- | net/sched/sch_qfq.c | 3 | ||||
-rw-r--r-- | net/sched/sch_sfb.c | 3 | ||||
-rw-r--r-- | net/sched/sch_taprio.c | 6 |
16 files changed, 694 insertions, 101 deletions
diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 2e85b636b27b..b919826939e0 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -908,7 +908,7 @@ static const struct nla_policy tcf_action_policy[TCA_ACT_MAX + 1] = { [TCA_ACT_HW_STATS] = NLA_POLICY_BITFIELD32(TCA_ACT_HW_STATS_ANY), }; -static void tcf_idr_insert_many(struct tc_action *actions[]) +void tcf_idr_insert_many(struct tc_action *actions[]) { int i; @@ -928,19 +928,13 @@ static void tcf_idr_insert_many(struct tc_action *actions[]) } } -struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, - struct nlattr *nla, struct nlattr *est, - char *name, int ovr, int bind, - bool rtnl_held, - struct netlink_ext_ack *extack) +struct tc_action_ops *tc_action_load_ops(char *name, struct nlattr *nla, + bool rtnl_held, + struct netlink_ext_ack *extack) { - struct nla_bitfield32 flags = { 0, 0 }; - u8 hw_stats = TCA_ACT_HW_STATS_ANY; - struct tc_action *a; + struct nlattr *tb[TCA_ACT_MAX + 1]; struct tc_action_ops *a_o; - struct tc_cookie *cookie = NULL; char act_name[IFNAMSIZ]; - struct nlattr *tb[TCA_ACT_MAX + 1]; struct nlattr *kind; int err; @@ -948,33 +942,21 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, err = nla_parse_nested_deprecated(tb, TCA_ACT_MAX, nla, tcf_action_policy, extack); if (err < 0) - goto err_out; + return ERR_PTR(err); err = -EINVAL; kind = tb[TCA_ACT_KIND]; if (!kind) { NL_SET_ERR_MSG(extack, "TC action kind must be specified"); - goto err_out; + return ERR_PTR(err); } if (nla_strscpy(act_name, kind, IFNAMSIZ) < 0) { NL_SET_ERR_MSG(extack, "TC action name too long"); - goto err_out; - } - if (tb[TCA_ACT_COOKIE]) { - cookie = nla_memdup_cookie(tb); - if (!cookie) { - NL_SET_ERR_MSG(extack, "No memory to generate TC cookie"); - err = -ENOMEM; - goto err_out; - } + return ERR_PTR(err); } - hw_stats = tcf_action_hw_stats_get(tb[TCA_ACT_HW_STATS]); - if (tb[TCA_ACT_FLAGS]) - flags = nla_get_bitfield32(tb[TCA_ACT_FLAGS]); } else { if (strlcpy(act_name, name, IFNAMSIZ) >= IFNAMSIZ) { NL_SET_ERR_MSG(extack, "TC action name too long"); - err = -EINVAL; - goto err_out; + return ERR_PTR(-EINVAL); } } @@ -996,24 +978,56 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, * indicate this using -EAGAIN. */ if (a_o != NULL) { - err = -EAGAIN; - goto err_mod; + module_put(a_o->owner); + return ERR_PTR(-EAGAIN); } #endif NL_SET_ERR_MSG(extack, "Failed to load TC action module"); - err = -ENOENT; - goto err_free; + return ERR_PTR(-ENOENT); } + return a_o; +} + +struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, + struct nlattr *nla, struct nlattr *est, + char *name, int ovr, int bind, + struct tc_action_ops *a_o, bool rtnl_held, + struct netlink_ext_ack *extack) +{ + struct nla_bitfield32 flags = { 0, 0 }; + u8 hw_stats = TCA_ACT_HW_STATS_ANY; + struct nlattr *tb[TCA_ACT_MAX + 1]; + struct tc_cookie *cookie = NULL; + struct tc_action *a; + int err; + /* backward compatibility for policer */ - if (name == NULL) + if (name == NULL) { + err = nla_parse_nested_deprecated(tb, TCA_ACT_MAX, nla, + tcf_action_policy, extack); + if (err < 0) + return ERR_PTR(err); + if (tb[TCA_ACT_COOKIE]) { + cookie = nla_memdup_cookie(tb); + if (!cookie) { + NL_SET_ERR_MSG(extack, "No memory to generate TC cookie"); + err = -ENOMEM; + goto err_out; + } + } + hw_stats = tcf_action_hw_stats_get(tb[TCA_ACT_HW_STATS]); + if (tb[TCA_ACT_FLAGS]) + flags = nla_get_bitfield32(tb[TCA_ACT_FLAGS]); + err = a_o->init(net, tb[TCA_ACT_OPTIONS], est, &a, ovr, bind, rtnl_held, tp, flags.value, extack); - else + } else { err = a_o->init(net, nla, est, &a, ovr, bind, rtnl_held, tp, flags.value, extack); + } if (err < 0) - goto err_mod; + goto err_out; if (!name && tb[TCA_ACT_COOKIE]) tcf_set_action_cookie(&a->act_cookie, cookie); @@ -1030,14 +1044,11 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, return a; -err_mod: - module_put(a_o->owner); -err_free: +err_out: if (cookie) { kfree(cookie->data); kfree(cookie); } -err_out: return ERR_PTR(err); } @@ -1048,6 +1059,7 @@ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla, struct tc_action *actions[], size_t *attr_size, bool rtnl_held, struct netlink_ext_ack *extack) { + struct tc_action_ops *ops[TCA_ACT_MAX_PRIO] = {}; struct nlattr *tb[TCA_ACT_MAX_PRIO + 1]; struct tc_action *act; size_t sz = 0; @@ -1060,8 +1072,19 @@ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla, return err; for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) { + struct tc_action_ops *a_o; + + a_o = tc_action_load_ops(name, tb[i], rtnl_held, extack); + if (IS_ERR(a_o)) { + err = PTR_ERR(a_o); + goto err_mod; + } + ops[i - 1] = a_o; + } + + for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) { act = tcf_action_init_1(net, tp, tb[i], est, name, ovr, bind, - rtnl_held, extack); + ops[i - 1], rtnl_held, extack); if (IS_ERR(act)) { err = PTR_ERR(act); goto err; @@ -1081,6 +1104,11 @@ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla, err: tcf_action_destroy(actions, bind); +err_mod: + for (i = 0; i < TCA_ACT_MAX_PRIO; i++) { + if (ops[i]) + module_put(ops[i]->owner); + } return err; } diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index 83a5c6722a06..f0a0aa125b00 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -183,6 +183,7 @@ static void tcf_ct_flow_table_add_action_meta(struct nf_conn *ct, IP_CT_ESTABLISHED_REPLY; /* aligns with the CT reference on the SKB nf_ct_set */ entry->ct_metadata.cookie = (unsigned long)ct | ctinfo; + entry->ct_metadata.orig_dir = dir == IP_CT_DIR_ORIGINAL; act_ct_labels = entry->ct_metadata.labels; ct_labels = nf_ct_labels_find(ct); @@ -1030,6 +1031,7 @@ out_push: out: tcf_action_update_bstats(&c->common, skb); + qdisc_skb_cb(skb)->post_ct = true; if (defrag) qdisc_skb_cb(skb)->pkt_len = skb->len; return retval; diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 37b77bd30974..e37556cc37ab 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -3043,16 +3043,24 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, size_t attr_size = 0; if (exts->police && tb[exts->police]) { + struct tc_action_ops *a_o; + + a_o = tc_action_load_ops("police", tb[exts->police], rtnl_held, extack); + if (IS_ERR(a_o)) + return PTR_ERR(a_o); act = tcf_action_init_1(net, tp, tb[exts->police], rate_tlv, "police", ovr, - TCA_ACT_BIND, rtnl_held, + TCA_ACT_BIND, a_o, rtnl_held, extack); - if (IS_ERR(act)) + if (IS_ERR(act)) { + module_put(a_o->owner); return PTR_ERR(act); + } act->type = exts->type = TCA_OLD_COMPAT; exts->actions[0] = act; exts->nr_actions = 1; + tcf_idr_insert_many(exts->actions); } else if (exts->action && tb[exts->action]) { int err; diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 1319986693fc..2409e522c68f 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -30,6 +30,11 @@ #include <uapi/linux/netfilter/nf_conntrack_common.h> +#define TCA_FLOWER_KEY_CT_FLAGS_MAX \ + ((__TCA_FLOWER_KEY_CT_FLAGS_MAX - 1) << 1) +#define TCA_FLOWER_KEY_CT_FLAGS_MASK \ + (TCA_FLOWER_KEY_CT_FLAGS_MAX - 1) + struct fl_flow_key { struct flow_dissector_key_meta meta; struct flow_dissector_key_control control; @@ -291,9 +296,11 @@ static u16 fl_ct_info_to_flower_map[] = { [IP_CT_RELATED] = TCA_FLOWER_KEY_CT_FLAGS_TRACKED | TCA_FLOWER_KEY_CT_FLAGS_RELATED, [IP_CT_ESTABLISHED_REPLY] = TCA_FLOWER_KEY_CT_FLAGS_TRACKED | - TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED, + TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED | + TCA_FLOWER_KEY_CT_FLAGS_REPLY, [IP_CT_RELATED_REPLY] = TCA_FLOWER_KEY_CT_FLAGS_TRACKED | - TCA_FLOWER_KEY_CT_FLAGS_RELATED, + TCA_FLOWER_KEY_CT_FLAGS_RELATED | + TCA_FLOWER_KEY_CT_FLAGS_REPLY, [IP_CT_NEW] = TCA_FLOWER_KEY_CT_FLAGS_TRACKED | TCA_FLOWER_KEY_CT_FLAGS_NEW, }; @@ -302,6 +309,7 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res) { struct cls_fl_head *head = rcu_dereference_bh(tp->root); + bool post_ct = qdisc_skb_cb(skb)->post_ct; struct fl_flow_key skb_key; struct fl_flow_mask *mask; struct cls_fl_filter *f; @@ -318,7 +326,8 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp, skb_flow_dissect_tunnel_info(skb, &mask->dissector, &skb_key); skb_flow_dissect_ct(skb, &mask->dissector, &skb_key, fl_ct_info_to_flower_map, - ARRAY_SIZE(fl_ct_info_to_flower_map)); + ARRAY_SIZE(fl_ct_info_to_flower_map), + post_ct); skb_flow_dissect_hash(skb, &mask->dissector, &skb_key); skb_flow_dissect(skb, &mask->dissector, &skb_key, 0); @@ -686,8 +695,10 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = { [TCA_FLOWER_KEY_ENC_IP_TTL_MASK] = { .type = NLA_U8 }, [TCA_FLOWER_KEY_ENC_OPTS] = { .type = NLA_NESTED }, [TCA_FLOWER_KEY_ENC_OPTS_MASK] = { .type = NLA_NESTED }, - [TCA_FLOWER_KEY_CT_STATE] = { .type = NLA_U16 }, - [TCA_FLOWER_KEY_CT_STATE_MASK] = { .type = NLA_U16 }, + [TCA_FLOWER_KEY_CT_STATE] = + NLA_POLICY_MASK(NLA_U16, TCA_FLOWER_KEY_CT_FLAGS_MASK), + [TCA_FLOWER_KEY_CT_STATE_MASK] = + NLA_POLICY_MASK(NLA_U16, TCA_FLOWER_KEY_CT_FLAGS_MASK), [TCA_FLOWER_KEY_CT_ZONE] = { .type = NLA_U16 }, [TCA_FLOWER_KEY_CT_ZONE_MASK] = { .type = NLA_U16 }, [TCA_FLOWER_KEY_CT_MARK] = { .type = NLA_U32 }, @@ -1272,6 +1283,10 @@ static int fl_set_enc_opt(struct nlattr **tb, struct fl_flow_key *key, nla_opt_msk = nla_data(tb[TCA_FLOWER_KEY_ENC_OPTS_MASK]); msk_depth = nla_len(tb[TCA_FLOWER_KEY_ENC_OPTS_MASK]); + if (!nla_ok(nla_opt_msk, msk_depth)) { + NL_SET_ERR_MSG(extack, "Invalid nested attribute for masks"); + return -EINVAL; + } } nla_for_each_attr(nla_opt_key, nla_enc_key, @@ -1307,9 +1322,6 @@ static int fl_set_enc_opt(struct nlattr **tb, struct fl_flow_key *key, NL_SET_ERR_MSG(extack, "Key and mask miss aligned"); return -EINVAL; } - - if (msk_depth) - nla_opt_msk = nla_next(nla_opt_msk, &msk_depth); break; case TCA_FLOWER_KEY_ENC_OPTS_VXLAN: if (key->enc_opts.dst_opt_type) { @@ -1340,9 +1352,6 @@ static int fl_set_enc_opt(struct nlattr **tb, struct fl_flow_key *key, NL_SET_ERR_MSG(extack, "Key and mask miss aligned"); return -EINVAL; } - - if (msk_depth) - nla_opt_msk = nla_next(nla_opt_msk, &msk_depth); break; case TCA_FLOWER_KEY_ENC_OPTS_ERSPAN: if (key->enc_opts.dst_opt_type) { @@ -1373,14 +1382,39 @@ static int fl_set_enc_opt(struct nlattr **tb, struct fl_flow_key *key, NL_SET_ERR_MSG(extack, "Key and mask miss aligned"); return -EINVAL; } - - if (msk_depth) - nla_opt_msk = nla_next(nla_opt_msk, &msk_depth); break; default: NL_SET_ERR_MSG(extack, "Unknown tunnel option type"); return -EINVAL; } + + if (!msk_depth) + continue; + + if (!nla_ok(nla_opt_msk, msk_depth)) { + NL_SET_ERR_MSG(extack, "A mask attribute is invalid"); + return -EINVAL; + } + nla_opt_msk = nla_next(nla_opt_msk, &msk_depth); + } + + return 0; +} + +static int fl_validate_ct_state(u16 state, struct nlattr *tb, + struct netlink_ext_ack *extack) +{ + if (state && !(state & TCA_FLOWER_KEY_CT_FLAGS_TRACKED)) { + NL_SET_ERR_MSG_ATTR(extack, tb, + "no trk, so no other flag can be set"); + return -EINVAL; + } + + if (state & TCA_FLOWER_KEY_CT_FLAGS_NEW && + state & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED) { + NL_SET_ERR_MSG_ATTR(extack, tb, + "new and est are mutually exclusive"); + return -EINVAL; } return 0; @@ -1392,6 +1426,8 @@ static int fl_set_key_ct(struct nlattr **tb, struct netlink_ext_ack *extack) { if (tb[TCA_FLOWER_KEY_CT_STATE]) { + int err; + if (!IS_ENABLED(CONFIG_NF_CONNTRACK)) { NL_SET_ERR_MSG(extack, "Conntrack isn't enabled"); return -EOPNOTSUPP; @@ -1399,6 +1435,13 @@ static int fl_set_key_ct(struct nlattr **tb, fl_set_key_val(tb, &key->ct_state, TCA_FLOWER_KEY_CT_STATE, &mask->ct_state, TCA_FLOWER_KEY_CT_STATE_MASK, sizeof(key->ct_state)); + + err = fl_validate_ct_state(mask->ct_state, + tb[TCA_FLOWER_KEY_CT_STATE_MASK], + extack); + if (err) + return err; + } if (tb[TCA_FLOWER_KEY_CT_ZONE]) { if (!IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES)) { diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index 78bec347b8b6..c4007b9cd16d 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -366,9 +366,13 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, if (tb[TCA_TCINDEX_MASK]) cp->mask = nla_get_u16(tb[TCA_TCINDEX_MASK]); - if (tb[TCA_TCINDEX_SHIFT]) + if (tb[TCA_TCINDEX_SHIFT]) { cp->shift = nla_get_u32(tb[TCA_TCINDEX_SHIFT]); - + if (cp->shift > 16) { + err = -EINVAL; + goto errout; + } + } if (!cp->hash) { /* Hash not specified, use perfect hash if the upper limit * of the hashing index is below the threshold. diff --git a/net/sched/em_nbyte.c b/net/sched/em_nbyte.c index 2c1192a2ee5e..a83b237cbeb0 100644 --- a/net/sched/em_nbyte.c +++ b/net/sched/em_nbyte.c @@ -31,7 +31,7 @@ static int em_nbyte_change(struct net *net, void *data, int data_len, em->datalen = sizeof(*nbyte) + nbyte->len; em->data = (unsigned long)kmemdup(data, em->datalen, GFP_KERNEL); if (em->data == 0UL) - return -ENOBUFS; + return -ENOMEM; return 0; } diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 51cb553e4317..e2e4353db8a7 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -412,7 +412,8 @@ struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, { struct qdisc_rate_table *rtab; - if (tab == NULL || r->rate == 0 || r->cell_log == 0 || + if (tab == NULL || r->rate == 0 || + r->cell_log == 0 || r->cell_log >= 32 || nla_len(tab) != TC_RTAB_SIZE) { NL_SET_ERR_MSG(extack, "Invalid rate table parameters for searching"); return NULL; @@ -1865,7 +1866,8 @@ static int tclass_notify(struct net *net, struct sk_buff *oskb, static int tclass_del_notify(struct net *net, const struct Qdisc_class_ops *cops, struct sk_buff *oskb, struct nlmsghdr *n, - struct Qdisc *q, unsigned long cl) + struct Qdisc *q, unsigned long cl, + struct netlink_ext_ack *extack) { u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; struct sk_buff *skb; @@ -1884,7 +1886,7 @@ static int tclass_del_notify(struct net *net, return -EINVAL; } - err = cops->delete(q, cl); + err = cops->delete(q, cl, extack); if (err) { kfree_skb(skb); return err; @@ -2087,7 +2089,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, goto out; break; case RTM_DELTCLASS: - err = tclass_del_notify(net, cops, skb, n, q, cl); + err = tclass_del_notify(net, cops, skb, n, q, cl, extack); /* Unbind the class with flilters with 0 */ tc_bind_tclass(q, portid, clid, 0); goto out; diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index 007bd2d9f1ff..d0c9a57398fc 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -320,7 +320,8 @@ err_out: return error; } -static int atm_tc_delete(struct Qdisc *sch, unsigned long arg) +static int atm_tc_delete(struct Qdisc *sch, unsigned long arg, + struct netlink_ext_ack *extack) { struct atm_qdisc_data *p = qdisc_priv(sch); struct atm_flow_data *flow = (struct atm_flow_data *)arg; diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 53d45e029c36..320b3d31fa97 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -1675,7 +1675,8 @@ failure: return err; } -static int cbq_delete(struct Qdisc *sch, unsigned long arg) +static int cbq_delete(struct Qdisc *sch, unsigned long arg, + struct netlink_ext_ack *extack) { struct cbq_sched_data *q = qdisc_priv(sch); struct cbq_class *cl = (struct cbq_class *)arg; diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c index dde564670ad8..fc1e47069593 100644 --- a/net/sched/sch_drr.c +++ b/net/sched/sch_drr.c @@ -146,7 +146,8 @@ static void drr_destroy_class(struct Qdisc *sch, struct drr_class *cl) kfree(cl); } -static int drr_delete_class(struct Qdisc *sch, unsigned long arg) +static int drr_delete_class(struct Qdisc *sch, unsigned long arg, + struct netlink_ext_ack *extack) { struct drr_sched *q = qdisc_priv(sch); struct drr_class *cl = (struct drr_class *)arg; diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c index 2b88710994d7..cd2748e2d4a2 100644 --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c @@ -150,7 +150,8 @@ errout: return err; } -static int dsmark_delete(struct Qdisc *sch, unsigned long arg) +static int dsmark_delete(struct Qdisc *sch, unsigned long arg, + struct netlink_ext_ack *extack) { struct dsmark_qdisc_data *p = qdisc_priv(sch); diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index d1902fca9844..bf0034c66e35 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -1090,7 +1090,8 @@ hfsc_destroy_class(struct Qdisc *sch, struct hfsc_class *cl) } static int -hfsc_delete_class(struct Qdisc *sch, unsigned long arg) +hfsc_delete_class(struct Qdisc *sch, unsigned long arg, + struct netlink_ext_ack *extack) { struct hfsc_sched *q = qdisc_priv(sch); struct hfsc_class *cl = (struct hfsc_class *)arg; diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index cd70dbcbd72f..dff3adf5a915 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -114,6 +114,7 @@ struct htb_class { * Written often fields */ struct gnet_stats_basic_packed bstats; + struct gnet_stats_basic_packed bstats_bias; struct tc_htb_xstats xstats; /* our special stats */ /* token bucket parameters */ @@ -174,6 +175,11 @@ struct htb_sched { int row_mask[TC_HTB_MAXDEPTH]; struct htb_level hlevel[TC_HTB_MAXDEPTH]; + + struct Qdisc **direct_qdiscs; + unsigned int num_direct_qdiscs; + + bool offload; }; /* find class in global hash table using given handle */ @@ -957,7 +963,7 @@ static void htb_reset(struct Qdisc *sch) if (cl->level) memset(&cl->inner, 0, sizeof(cl->inner)); else { - if (cl->leaf.q) + if (cl->leaf.q && !q->offload) qdisc_reset(cl->leaf.q); } cl->prio_activity = 0; @@ -980,6 +986,7 @@ static const struct nla_policy htb_policy[TCA_HTB_MAX + 1] = { [TCA_HTB_DIRECT_QLEN] = { .type = NLA_U32 }, [TCA_HTB_RATE64] = { .type = NLA_U64 }, [TCA_HTB_CEIL64] = { .type = NLA_U64 }, + [TCA_HTB_OFFLOAD] = { .type = NLA_FLAG }, }; static void htb_work_func(struct work_struct *work) @@ -992,12 +999,27 @@ static void htb_work_func(struct work_struct *work) rcu_read_unlock(); } +static void htb_set_lockdep_class_child(struct Qdisc *q) +{ + static struct lock_class_key child_key; + + lockdep_set_class(qdisc_lock(q), &child_key); +} + +static int htb_offload(struct net_device *dev, struct tc_htb_qopt_offload *opt) +{ + return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_HTB, opt); +} + static int htb_init(struct Qdisc *sch, struct nlattr *opt, struct netlink_ext_ack *extack) { + struct net_device *dev = qdisc_dev(sch); + struct tc_htb_qopt_offload offload_opt; struct htb_sched *q = qdisc_priv(sch); struct nlattr *tb[TCA_HTB_MAX + 1]; struct tc_htb_glob *gopt; + unsigned int ntx; int err; qdisc_watchdog_init(&q->watchdog, sch); @@ -1022,9 +1044,26 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt, if (gopt->version != HTB_VER >> 16) return -EINVAL; + q->offload = nla_get_flag(tb[TCA_HTB_OFFLOAD]); + + if (q->offload) { + if (sch->parent != TC_H_ROOT) + return -EOPNOTSUPP; + + if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc) + return -EOPNOTSUPP; + + q->num_direct_qdiscs = dev->real_num_tx_queues; + q->direct_qdiscs = kcalloc(q->num_direct_qdiscs, + sizeof(*q->direct_qdiscs), + GFP_KERNEL); + if (!q->direct_qdiscs) + return -ENOMEM; + } + err = qdisc_class_hash_init(&q->clhash); if (err < 0) - return err; + goto err_free_direct_qdiscs; qdisc_skb_head_init(&q->direct_queue); @@ -1037,7 +1076,107 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt, q->rate2quantum = 1; q->defcls = gopt->defcls; + if (!q->offload) + return 0; + + for (ntx = 0; ntx < q->num_direct_qdiscs; ntx++) { + struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx); + struct Qdisc *qdisc; + + qdisc = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops, + TC_H_MAKE(sch->handle, 0), extack); + if (!qdisc) { + err = -ENOMEM; + goto err_free_qdiscs; + } + + htb_set_lockdep_class_child(qdisc); + q->direct_qdiscs[ntx] = qdisc; + qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT; + } + + sch->flags |= TCQ_F_MQROOT; + + offload_opt = (struct tc_htb_qopt_offload) { + .command = TC_HTB_CREATE, + .parent_classid = TC_H_MAJ(sch->handle) >> 16, + .classid = TC_H_MIN(q->defcls), + .extack = extack, + }; + err = htb_offload(dev, &offload_opt); + if (err) + goto err_free_qdiscs; + return 0; + +err_free_qdiscs: + /* TC_HTB_CREATE call failed, avoid any further calls to the driver. */ + q->offload = false; + + for (ntx = 0; ntx < q->num_direct_qdiscs && q->direct_qdiscs[ntx]; + ntx++) + qdisc_put(q->direct_qdiscs[ntx]); + + qdisc_class_hash_destroy(&q->clhash); + /* Prevent use-after-free and double-free when htb_destroy gets called. + */ + q->clhash.hash = NULL; + q->clhash.hashsize = 0; + +err_free_direct_qdiscs: + kfree(q->direct_qdiscs); + q->direct_qdiscs = NULL; + return err; +} + +static void htb_attach_offload(struct Qdisc *sch) +{ + struct net_device *dev = qdisc_dev(sch); + struct htb_sched *q = qdisc_priv(sch); + unsigned int ntx; + + for (ntx = 0; ntx < q->num_direct_qdiscs; ntx++) { + struct Qdisc *old, *qdisc = q->direct_qdiscs[ntx]; + + old = dev_graft_qdisc(qdisc->dev_queue, qdisc); + qdisc_put(old); + qdisc_hash_add(qdisc, false); + } + for (ntx = q->num_direct_qdiscs; ntx < dev->num_tx_queues; ntx++) { + struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx); + struct Qdisc *old = dev_graft_qdisc(dev_queue, NULL); + + qdisc_put(old); + } + + kfree(q->direct_qdiscs); + q->direct_qdiscs = NULL; +} + +static void htb_attach_software(struct Qdisc *sch) +{ + struct net_device *dev = qdisc_dev(sch); + unsigned int ntx; + + /* Resemble qdisc_graft behavior. */ + for (ntx = 0; ntx < dev->num_tx_queues; ntx++) { + struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx); + struct Qdisc *old = dev_graft_qdisc(dev_queue, sch); + + qdisc_refcount_inc(sch); + + qdisc_put(old); + } +} + +static void htb_attach(struct Qdisc *sch) +{ + struct htb_sched *q = qdisc_priv(sch); + + if (q->offload) + htb_attach_offload(sch); + else + htb_attach_software(sch); } static int htb_dump(struct Qdisc *sch, struct sk_buff *skb) @@ -1046,6 +1185,11 @@ static int htb_dump(struct Qdisc *sch, struct sk_buff *skb) struct nlattr *nest; struct tc_htb_glob gopt; + if (q->offload) + sch->flags |= TCQ_F_OFFLOADED; + else + sch->flags &= ~TCQ_F_OFFLOADED; + sch->qstats.overlimits = q->overlimits; /* Its safe to not acquire qdisc lock. As we hold RTNL, * no change can happen on the qdisc parameters. @@ -1063,6 +1207,8 @@ static int htb_dump(struct Qdisc *sch, struct sk_buff *skb) if (nla_put(skb, TCA_HTB_INIT, sizeof(gopt), &gopt) || nla_put_u32(skb, TCA_HTB_DIRECT_QLEN, q->direct_qlen)) goto nla_put_failure; + if (q->offload && nla_put_flag(skb, TCA_HTB_OFFLOAD)) + goto nla_put_failure; return nla_nest_end(skb, nest); @@ -1075,6 +1221,7 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg, struct sk_buff *skb, struct tcmsg *tcm) { struct htb_class *cl = (struct htb_class *)arg; + struct htb_sched *q = qdisc_priv(sch); struct nlattr *nest; struct tc_htb_opt opt; @@ -1101,6 +1248,8 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg, opt.level = cl->level; if (nla_put(skb, TCA_HTB_PARMS, sizeof(opt), &opt)) goto nla_put_failure; + if (q->offload && nla_put_flag(skb, TCA_HTB_OFFLOAD)) + goto nla_put_failure; if ((cl->rate.rate_bytes_ps >= (1ULL << 32)) && nla_put_u64_64bit(skb, TCA_HTB_RATE64, cl->rate.rate_bytes_ps, TCA_HTB_PAD)) @@ -1117,10 +1266,39 @@ nla_put_failure: return -1; } +static void htb_offload_aggregate_stats(struct htb_sched *q, + struct htb_class *cl) +{ + struct htb_class *c; + unsigned int i; + + memset(&cl->bstats, 0, sizeof(cl->bstats)); + + for (i = 0; i < q->clhash.hashsize; i++) { + hlist_for_each_entry(c, &q->clhash.hash[i], common.hnode) { + struct htb_class *p = c; + + while (p && p->level < cl->level) + p = p->parent; + + if (p != cl) + continue; + + cl->bstats.bytes += c->bstats_bias.bytes; + cl->bstats.packets += c->bstats_bias.packets; + if (c->level == 0) { + cl->bstats.bytes += c->leaf.q->bstats.bytes; + cl->bstats.packets += c->leaf.q->bstats.packets; + } + } + } +} + static int htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d) { struct htb_class *cl = (struct htb_class *)arg; + struct htb_sched *q = qdisc_priv(sch); struct gnet_stats_queue qs = { .drops = cl->drops, .overlimits = cl->overlimits, @@ -1135,6 +1313,19 @@ htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d) cl->xstats.ctokens = clamp_t(s64, PSCHED_NS2TICKS(cl->ctokens), INT_MIN, INT_MAX); + if (q->offload) { + if (!cl->level) { + if (cl->leaf.q) + cl->bstats = cl->leaf.q->bstats; + else + memset(&cl->bstats, 0, sizeof(cl->bstats)); + cl->bstats.bytes += cl->bstats_bias.bytes; + cl->bstats.packets += cl->bstats_bias.packets; + } else { + htb_offload_aggregate_stats(q, cl); + } + } + if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), d, NULL, &cl->bstats) < 0 || gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 || @@ -1144,19 +1335,97 @@ htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d) return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats)); } +static struct netdev_queue * +htb_select_queue(struct Qdisc *sch, struct tcmsg *tcm) +{ + struct net_device *dev = qdisc_dev(sch); + struct tc_htb_qopt_offload offload_opt; + int err; + + offload_opt = (struct tc_htb_qopt_offload) { + .command = TC_HTB_LEAF_QUERY_QUEUE, + .classid = TC_H_MIN(tcm->tcm_parent), + }; + err = htb_offload(dev, &offload_opt); + if (err || offload_opt.qid >= dev->num_tx_queues) + return NULL; + return netdev_get_tx_queue(dev, offload_opt.qid); +} + +static struct Qdisc * +htb_graft_helper(struct netdev_queue *dev_queue, struct Qdisc *new_q) +{ + struct net_device *dev = dev_queue->dev; + struct Qdisc *old_q; + + if (dev->flags & IFF_UP) + dev_deactivate(dev); + old_q = dev_graft_qdisc(dev_queue, new_q); + if (new_q) + new_q->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT; + if (dev->flags & IFF_UP) + dev_activate(dev); + + return old_q; +} + +static void htb_offload_move_qdisc(struct Qdisc *sch, u16 qid_old, u16 qid_new) +{ + struct netdev_queue *queue_old, *queue_new; + struct net_device *dev = qdisc_dev(sch); + struct Qdisc *qdisc; + + queue_old = netdev_get_tx_queue(dev, qid_old); + queue_new = netdev_get_tx_queue(dev, qid_new); + + if (dev->flags & IFF_UP) + dev_deactivate(dev); + qdisc = dev_graft_qdisc(queue_old, NULL); + qdisc->dev_queue = queue_new; + qdisc = dev_graft_qdisc(queue_new, qdisc); + if (dev->flags & IFF_UP) + dev_activate(dev); + + WARN_ON(!(qdisc->flags & TCQ_F_BUILTIN)); +} + static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, struct Qdisc **old, struct netlink_ext_ack *extack) { + struct netdev_queue *dev_queue = sch->dev_queue; struct htb_class *cl = (struct htb_class *)arg; + struct htb_sched *q = qdisc_priv(sch); + struct Qdisc *old_q; if (cl->level) return -EINVAL; - if (new == NULL && - (new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, - cl->common.classid, extack)) == NULL) - return -ENOBUFS; + + if (q->offload) { + dev_queue = new->dev_queue; + WARN_ON(dev_queue != cl->leaf.q->dev_queue); + } + + if (!new) { + new = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops, + cl->common.classid, extack); + if (!new) + return -ENOBUFS; + } + + if (q->offload) { + htb_set_lockdep_class_child(new); + /* One ref for cl->leaf.q, the other for dev_queue->qdisc. */ + qdisc_refcount_inc(new); + old_q = htb_graft_helper(dev_queue, new); + } *old = qdisc_replace(sch, new, &cl->leaf.q); + + if (q->offload) { + WARN_ON(old_q != *old); + qdisc_put(old_q); + } + return 0; } @@ -1184,9 +1453,10 @@ static inline int htb_parent_last_child(struct htb_class *cl) return 1; } -static void htb_parent_to_leaf(struct htb_sched *q, struct htb_class *cl, +static void htb_parent_to_leaf(struct Qdisc *sch, struct htb_class *cl, struct Qdisc *new_q) { + struct htb_sched *q = qdisc_priv(sch); struct htb_class *parent = cl->parent; WARN_ON(cl->level || !cl->leaf.q || cl->prio_activity); @@ -1204,6 +1474,76 @@ static void htb_parent_to_leaf(struct htb_sched *q, struct htb_class *cl, parent->cmode = HTB_CAN_SEND; } +static void htb_parent_to_leaf_offload(struct Qdisc *sch, + struct netdev_queue *dev_queue, + struct Qdisc *new_q) +{ + struct Qdisc *old_q; + + /* One ref for cl->leaf.q, the other for dev_queue->qdisc. */ + qdisc_refcount_inc(new_q); + old_q = htb_graft_helper(dev_queue, new_q); + WARN_ON(!(old_q->flags & TCQ_F_BUILTIN)); +} + +static int htb_destroy_class_offload(struct Qdisc *sch, struct htb_class *cl, + bool last_child, bool destroying, + struct netlink_ext_ack *extack) +{ + struct tc_htb_qopt_offload offload_opt; + struct Qdisc *q = cl->leaf.q; + struct Qdisc *old = NULL; + int err; + + if (cl->level) + return -EINVAL; + + WARN_ON(!q); + if (!destroying) { + /* On destroy of HTB, two cases are possible: + * 1. q is a normal qdisc, but q->dev_queue has noop qdisc. + * 2. q is a noop qdisc (for nodes that were inner), + * q->dev_queue is noop_netdev_queue. + */ + old = htb_graft_helper(q->dev_queue, NULL); + WARN_ON(!old); + WARN_ON(old != q); + } + + if (cl->parent) { + cl->parent->bstats_bias.bytes += q->bstats.bytes; + cl->parent->bstats_bias.packets += q->bstats.packets; + } + + offload_opt = (struct tc_htb_qopt_offload) { + .command = !last_child ? TC_HTB_LEAF_DEL : + destroying ? TC_HTB_LEAF_DEL_LAST_FORCE : + TC_HTB_LEAF_DEL_LAST, + .classid = cl->common.classid, + .extack = extack, + }; + err = htb_offload(qdisc_dev(sch), &offload_opt); + + if (!err || destroying) + qdisc_put(old); + else + htb_graft_helper(q->dev_queue, old); + + if (last_child) + return err; + + if (!err && offload_opt.moved_qid != 0) { + if (destroying) + q->dev_queue = netdev_get_tx_queue(qdisc_dev(sch), + offload_opt.qid); + else + htb_offload_move_qdisc(sch, offload_opt.moved_qid, + offload_opt.qid); + } + + return err; +} + static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl) { if (!cl->level) { @@ -1217,8 +1557,11 @@ static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl) static void htb_destroy(struct Qdisc *sch) { + struct net_device *dev = qdisc_dev(sch); + struct tc_htb_qopt_offload offload_opt; struct htb_sched *q = qdisc_priv(sch); struct hlist_node *next; + bool nonempty, changed; struct htb_class *cl; unsigned int i; @@ -1237,21 +1580,68 @@ static void htb_destroy(struct Qdisc *sch) cl->block = NULL; } } - for (i = 0; i < q->clhash.hashsize; i++) { - hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i], - common.hnode) - htb_destroy_class(sch, cl); - } + + do { + nonempty = false; + changed = false; + for (i = 0; i < q->clhash.hashsize; i++) { + hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i], + common.hnode) { + bool last_child; + + if (!q->offload) { + htb_destroy_class(sch, cl); + continue; + } + + nonempty = true; + + if (cl->level) + continue; + + changed = true; + + last_child = htb_parent_last_child(cl); + htb_destroy_class_offload(sch, cl, last_child, + true, NULL); + qdisc_class_hash_remove(&q->clhash, + &cl->common); + if (cl->parent) + cl->parent->children--; + if (last_child) + htb_parent_to_leaf(sch, cl, NULL); + htb_destroy_class(sch, cl); + } + } + } while (changed); + WARN_ON(nonempty); + qdisc_class_hash_destroy(&q->clhash); __qdisc_reset_queue(&q->direct_queue); + + if (!q->offload) + return; + + offload_opt = (struct tc_htb_qopt_offload) { + .command = TC_HTB_DESTROY, + }; + htb_offload(dev, &offload_opt); + + if (!q->direct_qdiscs) + return; + for (i = 0; i < q->num_direct_qdiscs && q->direct_qdiscs[i]; i++) + qdisc_put(q->direct_qdiscs[i]); + kfree(q->direct_qdiscs); } -static int htb_delete(struct Qdisc *sch, unsigned long arg) +static int htb_delete(struct Qdisc *sch, unsigned long arg, + struct netlink_ext_ack *extack) { struct htb_sched *q = qdisc_priv(sch); struct htb_class *cl = (struct htb_class *)arg; struct Qdisc *new_q = NULL; int last_child = 0; + int err; /* TODO: why don't allow to delete subtree ? references ? does * tc subsys guarantee us that in htb_destroy it holds no class @@ -1260,11 +1650,28 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg) if (cl->children || cl->filter_cnt) return -EBUSY; - if (!cl->level && htb_parent_last_child(cl)) { - new_q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, + if (!cl->level && htb_parent_last_child(cl)) + last_child = 1; + + if (q->offload) { + err = htb_destroy_class_offload(sch, cl, last_child, false, + extack); + if (err) + return err; + } + + if (last_child) { + struct netdev_queue *dev_queue; + + dev_queue = q->offload ? cl->leaf.q->dev_queue : sch->dev_queue; + new_q = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops, cl->parent->common.classid, NULL); - last_child = 1; + if (q->offload) { + if (new_q) + htb_set_lockdep_class_child(new_q); + htb_parent_to_leaf_offload(sch, dev_queue, new_q); + } } sch_tree_lock(sch); @@ -1285,7 +1692,7 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg) &q->hlevel[cl->level].wait_pq); if (last_child) - htb_parent_to_leaf(q, cl, new_q); + htb_parent_to_leaf(sch, cl, new_q); sch_tree_unlock(sch); @@ -1300,9 +1707,11 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, int err = -EINVAL; struct htb_sched *q = qdisc_priv(sch); struct htb_class *cl = (struct htb_class *)*arg, *parent; + struct tc_htb_qopt_offload offload_opt; struct nlattr *opt = tca[TCA_OPTIONS]; struct nlattr *tb[TCA_HTB_MAX + 1]; struct Qdisc *parent_qdisc = NULL; + struct netdev_queue *dev_queue; struct tc_htb_opt *hopt; u64 rate64, ceil64; int warn = 0; @@ -1335,8 +1744,12 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, qdisc_put_rtab(qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB], NULL)); + rate64 = tb[TCA_HTB_RATE64] ? nla_get_u64(tb[TCA_HTB_RATE64]) : 0; + ceil64 = tb[TCA_HTB_CEIL64] ? nla_get_u64(tb[TCA_HTB_CEIL64]) : 0; + if (!cl) { /* new class */ - struct Qdisc *new_q; + struct net_device *dev = qdisc_dev(sch); + struct Qdisc *new_q, *old_q; int prio; struct { struct nlattr nla; @@ -1379,11 +1792,8 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, NULL, qdisc_root_sleeping_running(sch), tca[TCA_RATE] ? : &est.nla); - if (err) { - tcf_block_put(cl->block); - kfree(cl); - goto failure; - } + if (err) + goto err_block_put; } cl->children = 0; @@ -1392,12 +1802,76 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, for (prio = 0; prio < TC_HTB_NUMPRIO; prio++) RB_CLEAR_NODE(&cl->node[prio]); + cl->common.classid = classid; + + /* Make sure nothing interrupts us in between of two + * ndo_setup_tc calls. + */ + ASSERT_RTNL(); + /* create leaf qdisc early because it uses kmalloc(GFP_KERNEL) * so that can't be used inside of sch_tree_lock * -- thanks to Karlis Peisenieks */ - new_q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, + if (!q->offload) { + dev_queue = sch->dev_queue; + } else if (!(parent && !parent->level)) { + /* Assign a dev_queue to this classid. */ + offload_opt = (struct tc_htb_qopt_offload) { + .command = TC_HTB_LEAF_ALLOC_QUEUE, + .classid = cl->common.classid, + .parent_classid = parent ? + TC_H_MIN(parent->common.classid) : + TC_HTB_CLASSID_ROOT, + .rate = max_t(u64, hopt->rate.rate, rate64), + .ceil = max_t(u64, hopt->ceil.rate, ceil64), + .extack = extack, + }; + err = htb_offload(dev, &offload_opt); + if (err) { + pr_err("htb: TC_HTB_LEAF_ALLOC_QUEUE failed with err = %d\n", + err); + goto err_kill_estimator; + } + dev_queue = netdev_get_tx_queue(dev, offload_opt.qid); + } else { /* First child. */ + dev_queue = parent->leaf.q->dev_queue; + old_q = htb_graft_helper(dev_queue, NULL); + WARN_ON(old_q != parent->leaf.q); + offload_opt = (struct tc_htb_qopt_offload) { + .command = TC_HTB_LEAF_TO_INNER, + .classid = cl->common.classid, + .parent_classid = + TC_H_MIN(parent->common.classid), + .rate = max_t(u64, hopt->rate.rate, rate64), + .ceil = max_t(u64, hopt->ceil.rate, ceil64), + .extack = extack, + }; + err = htb_offload(dev, &offload_opt); + if (err) { + pr_err("htb: TC_HTB_LEAF_TO_INNER failed with err = %d\n", + err); + htb_graft_helper(dev_queue, old_q); + goto err_kill_estimator; + } + parent->bstats_bias.bytes += old_q->bstats.bytes; + parent->bstats_bias.packets += old_q->bstats.packets; + qdisc_put(old_q); + } + new_q = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops, classid, NULL); + if (q->offload) { + if (new_q) { + htb_set_lockdep_class_child(new_q); + /* One ref for cl->leaf.q, the other for + * dev_queue->qdisc. + */ + qdisc_refcount_inc(new_q); + } + old_q = htb_graft_helper(dev_queue, new_q); + /* No qdisc_put needed. */ + WARN_ON(!(old_q->flags & TCQ_F_BUILTIN)); + } sch_tree_lock(sch); if (parent && !parent->level) { /* turn parent into inner node */ @@ -1415,10 +1889,10 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, : TC_HTB_MAXDEPTH) - 1; memset(&parent->inner, 0, sizeof(parent->inner)); } + /* leaf (we) needs elementary qdisc */ cl->leaf.q = new_q ? new_q : &noop_qdisc; - cl->common.classid = classid; cl->parent = parent; /* set class to be in HTB_CAN_SEND state */ @@ -1444,12 +1918,30 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, if (err) return err; } - sch_tree_lock(sch); - } - rate64 = tb[TCA_HTB_RATE64] ? nla_get_u64(tb[TCA_HTB_RATE64]) : 0; + if (q->offload) { + struct net_device *dev = qdisc_dev(sch); + + offload_opt = (struct tc_htb_qopt_offload) { + .command = TC_HTB_NODE_MODIFY, + .classid = cl->common.classid, + .rate = max_t(u64, hopt->rate.rate, rate64), + .ceil = max_t(u64, hopt->ceil.rate, ceil64), + .extack = extack, + }; + err = htb_offload(dev, &offload_opt); + if (err) + /* Estimator was replaced, and rollback may fail + * as well, so we don't try to recover it, and + * the estimator won't work property with the + * offload anyway, because bstats are updated + * only when the stats are queried. + */ + return err; + } - ceil64 = tb[TCA_HTB_CEIL64] ? nla_get_u64(tb[TCA_HTB_CEIL64]) : 0; + sch_tree_lock(sch); + } psched_ratecfg_precompute(&cl->rate, &hopt->rate, rate64); psched_ratecfg_precompute(&cl->ceil, &hopt->ceil, ceil64); @@ -1492,6 +1984,11 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, *arg = (unsigned long)cl; return 0; +err_kill_estimator: + gen_kill_estimator(&cl->rate_est); +err_block_put: + tcf_block_put(cl->block); + kfree(cl); failure: return err; } @@ -1557,6 +2054,7 @@ static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg) } static const struct Qdisc_class_ops htb_class_ops = { + .select_queue = htb_select_queue, .graft = htb_graft, .leaf = htb_leaf, .qlen_notify = htb_qlen_notify, @@ -1579,6 +2077,7 @@ static struct Qdisc_ops htb_qdisc_ops __read_mostly = { .dequeue = htb_dequeue, .peek = qdisc_peek_dequeued, .init = htb_init, + .attach = htb_attach, .reset = htb_reset, .destroy = htb_destroy, .dump = htb_dump, diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index 6335230a971e..1db9d4a2ef5e 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -529,7 +529,8 @@ static void qfq_destroy_class(struct Qdisc *sch, struct qfq_class *cl) kfree(cl); } -static int qfq_delete_class(struct Qdisc *sch, unsigned long arg) +static int qfq_delete_class(struct Qdisc *sch, unsigned long arg, + struct netlink_ext_ack *extack) { struct qfq_sched *q = qdisc_priv(sch); struct qfq_class *cl = (struct qfq_class *)arg; diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c index da047a37a3bf..dde829d4b9f8 100644 --- a/net/sched/sch_sfb.c +++ b/net/sched/sch_sfb.c @@ -649,7 +649,8 @@ static int sfb_change_class(struct Qdisc *sch, u32 classid, u32 parentid, return -ENOSYS; } -static int sfb_delete(struct Qdisc *sch, unsigned long cl) +static int sfb_delete(struct Qdisc *sch, unsigned long cl, + struct netlink_ext_ack *extack) { return -ENOSYS; } diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index 6f775275826a..8287894541e3 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -241,7 +241,7 @@ static struct sched_entry *find_entry_to_transmit(struct sk_buff *skb, /* Here, we are just trying to find out the * first available interval in the next cycle. */ - entry_available = 1; + entry_available = true; entry_found = entry; *interval_start = ktime_add_ns(curr_intv_start, cycle); *interval_end = ktime_add_ns(curr_intv_end, cycle); @@ -372,7 +372,7 @@ static long get_packet_txtime(struct sk_buff *skb, struct Qdisc *sch) packet_transmit_time = length_to_duration(q, len); do { - sched_changed = 0; + sched_changed = false; entry = find_entry_to_transmit(skb, sch, sched, admin, minimum_time, @@ -390,7 +390,7 @@ static long get_packet_txtime(struct sk_buff *skb, struct Qdisc *sch) if (admin && admin != sched && ktime_after(txtime, admin->base_time)) { sched = admin; - sched_changed = 1; + sched_changed = true; continue; } |