diff options
Diffstat (limited to 'net/sched')
-rw-r--r-- | net/sched/Kconfig | 13 | ||||
-rw-r--r-- | net/sched/act_ct.c | 2 | ||||
-rw-r--r-- | net/sched/act_mirred.c | 21 | ||||
-rw-r--r-- | net/sched/act_police.c | 27 | ||||
-rw-r--r-- | net/sched/act_sample.c | 27 | ||||
-rw-r--r-- | net/sched/act_vlan.c | 14 | ||||
-rw-r--r-- | net/sched/cls_api.c | 666 | ||||
-rw-r--r-- | net/sched/cls_bpf.c | 38 | ||||
-rw-r--r-- | net/sched/cls_flower.c | 124 | ||||
-rw-r--r-- | net/sched/cls_matchall.c | 33 | ||||
-rw-r--r-- | net/sched/cls_u32.c | 29 | ||||
-rw-r--r-- | net/sched/sch_cbs.c | 2 | ||||
-rw-r--r-- | net/sched/sch_fq_codel.c | 14 | ||||
-rw-r--r-- | net/sched/sch_generic.c | 3 | ||||
-rw-r--r-- | net/sched/sch_taprio.c | 413 |
15 files changed, 999 insertions, 427 deletions
diff --git a/net/sched/Kconfig b/net/sched/Kconfig index afd2ba157a13..b3faafeafab9 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -963,6 +963,19 @@ config NET_IFE_SKBTCINDEX tristate "Support to encoding decoding skb tcindex on IFE action" depends on NET_ACT_IFE +config NET_TC_SKB_EXT + bool "TC recirculation support" + depends on NET_CLS_ACT + default y if NET_CLS_ACT + select SKB_EXTENSIONS + + help + Say Y here to allow tc chain misses to continue in OvS datapath in + the correct recirc_id, and hardware chain misses to continue in + the correct chain in tc software datapath. + + Say N here if you won't be using tc<->ovs offload or tc chains offload. + endif # NET_SCHED config NET_SCH_FIFO diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index cdd6f3818097..fcc46025e790 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -24,12 +24,12 @@ #include <uapi/linux/tc_act/tc_ct.h> #include <net/tc_act/tc_ct.h> -#include <linux/netfilter/nf_nat.h> #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_core.h> #include <net/netfilter/nf_conntrack_zones.h> #include <net/netfilter/nf_conntrack_helper.h> #include <net/netfilter/ipv6/nf_defrag_ipv6.h> +#include <uapi/linux/netfilter/nf_nat.h> static struct tc_action_ops act_ct_ops; static unsigned int ct_net_id; diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 9d1bf508075a..9ce073a05414 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -408,25 +408,31 @@ static struct notifier_block mirred_device_notifier = { .notifier_call = mirred_device_event, }; -static struct net_device *tcf_mirred_get_dev(const struct tc_action *a) +static void tcf_mirred_dev_put(void *priv) +{ + struct net_device *dev = priv; + + dev_put(dev); +} + +static struct net_device * +tcf_mirred_get_dev(const struct tc_action *a, + tc_action_priv_destructor *destructor) { struct tcf_mirred *m = to_mirred(a); struct net_device *dev; rcu_read_lock(); dev = rcu_dereference(m->tcfm_dev); - if (dev) + if (dev) { dev_hold(dev); + *destructor = tcf_mirred_dev_put; + } rcu_read_unlock(); return dev; } -static void tcf_mirred_put_dev(struct net_device *dev) -{ - dev_put(dev); -} - static size_t tcf_mirred_get_fill_size(const struct tc_action *act) { return nla_total_size(sizeof(struct tc_mirred)); @@ -446,7 +452,6 @@ static struct tc_action_ops act_mirred_ops = { .get_fill_size = tcf_mirred_get_fill_size, .size = sizeof(struct tcf_mirred), .get_dev = tcf_mirred_get_dev, - .put_dev = tcf_mirred_put_dev, }; static __net_init int mirred_init_net(struct net *net) diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 6315e0f8d26e..89c04c52af3d 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -40,6 +40,8 @@ static const struct nla_policy police_policy[TCA_POLICE_MAX + 1] = { [TCA_POLICE_PEAKRATE] = { .len = TC_RTAB_SIZE }, [TCA_POLICE_AVRATE] = { .type = NLA_U32 }, [TCA_POLICE_RESULT] = { .type = NLA_U32 }, + [TCA_POLICE_RATE64] = { .type = NLA_U64 }, + [TCA_POLICE_PEAKRATE64] = { .type = NLA_U64 }, }; static int tcf_police_init(struct net *net, struct nlattr *nla, @@ -58,6 +60,7 @@ static int tcf_police_init(struct net *net, struct nlattr *nla, struct tcf_police_params *new; bool exists = false; u32 index; + u64 rate64, prate64; if (nla == NULL) return -EINVAL; @@ -155,14 +158,18 @@ static int tcf_police_init(struct net *net, struct nlattr *nla, } if (R_tab) { new->rate_present = true; - psched_ratecfg_precompute(&new->rate, &R_tab->rate, 0); + rate64 = tb[TCA_POLICE_RATE64] ? + nla_get_u64(tb[TCA_POLICE_RATE64]) : 0; + psched_ratecfg_precompute(&new->rate, &R_tab->rate, rate64); qdisc_put_rtab(R_tab); } else { new->rate_present = false; } if (P_tab) { new->peak_present = true; - psched_ratecfg_precompute(&new->peak, &P_tab->rate, 0); + prate64 = tb[TCA_POLICE_PEAKRATE64] ? + nla_get_u64(tb[TCA_POLICE_PEAKRATE64]) : 0; + psched_ratecfg_precompute(&new->peak, &P_tab->rate, prate64); qdisc_put_rtab(P_tab); } else { new->peak_present = false; @@ -313,10 +320,22 @@ static int tcf_police_dump(struct sk_buff *skb, struct tc_action *a, lockdep_is_held(&police->tcf_lock)); opt.mtu = p->tcfp_mtu; opt.burst = PSCHED_NS2TICKS(p->tcfp_burst); - if (p->rate_present) + if (p->rate_present) { psched_ratecfg_getrate(&opt.rate, &p->rate); - if (p->peak_present) + if ((police->params->rate.rate_bytes_ps >= (1ULL << 32)) && + nla_put_u64_64bit(skb, TCA_POLICE_RATE64, + police->params->rate.rate_bytes_ps, + TCA_POLICE_PAD)) + goto nla_put_failure; + } + if (p->peak_present) { psched_ratecfg_getrate(&opt.peakrate, &p->peak); + if ((police->params->peak.rate_bytes_ps >= (1ULL << 32)) && + nla_put_u64_64bit(skb, TCA_POLICE_PEAKRATE64, + police->params->peak.rate_bytes_ps, + TCA_POLICE_PAD)) + goto nla_put_failure; + } if (nla_put(skb, TCA_POLICE_TBF, sizeof(opt), &opt)) goto nla_put_failure; if (p->tcfp_result && diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c index 10229124a992..692c4c9040fd 100644 --- a/net/sched/act_sample.c +++ b/net/sched/act_sample.c @@ -252,6 +252,32 @@ static int tcf_sample_search(struct net *net, struct tc_action **a, u32 index) return tcf_idr_search(tn, a, index); } +static void tcf_psample_group_put(void *priv) +{ + struct psample_group *group = priv; + + psample_group_put(group); +} + +static struct psample_group * +tcf_sample_get_group(const struct tc_action *a, + tc_action_priv_destructor *destructor) +{ + struct tcf_sample *s = to_sample(a); + struct psample_group *group; + + spin_lock_bh(&s->tcf_lock); + group = rcu_dereference_protected(s->psample_group, + lockdep_is_held(&s->tcf_lock)); + if (group) { + psample_group_take(group); + *destructor = tcf_psample_group_put; + } + spin_unlock_bh(&s->tcf_lock); + + return group; +} + static struct tc_action_ops act_sample_ops = { .kind = "sample", .id = TCA_ID_SAMPLE, @@ -262,6 +288,7 @@ static struct tc_action_ops act_sample_ops = { .cleanup = tcf_sample_cleanup, .walk = tcf_sample_walker, .lookup = tcf_sample_search, + .get_psample_group = tcf_sample_get_group, .size = sizeof(struct tcf_sample), }; diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c index 287a30bf8930..08aaf719a70f 100644 --- a/net/sched/act_vlan.c +++ b/net/sched/act_vlan.c @@ -301,6 +301,19 @@ static int tcf_vlan_walker(struct net *net, struct sk_buff *skb, return tcf_generic_walker(tn, skb, cb, type, ops, extack); } +static void tcf_vlan_stats_update(struct tc_action *a, u64 bytes, u32 packets, + u64 lastuse, bool hw) +{ + struct tcf_vlan *v = to_vlan(a); + struct tcf_t *tm = &v->tcf_tm; + + _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), bytes, packets); + if (hw) + _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats_hw), + bytes, packets); + tm->lastuse = max_t(u64, tm->lastuse, lastuse); +} + static int tcf_vlan_search(struct net *net, struct tc_action **a, u32 index) { struct tc_action_net *tn = net_generic(net, vlan_net_id); @@ -325,6 +338,7 @@ static struct tc_action_ops act_vlan_ops = { .init = tcf_vlan_init, .cleanup = tcf_vlan_cleanup, .walk = tcf_vlan_walker, + .stats_update = tcf_vlan_stats_update, .get_fill_size = tcf_vlan_get_fill_size, .lookup = tcf_vlan_search, .size = sizeof(struct tcf_vlan), diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index efd3cfb80a2a..32577c248968 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -36,6 +36,8 @@ #include <net/tc_act/tc_sample.h> #include <net/tc_act/tc_skbedit.h> #include <net/tc_act/tc_ct.h> +#include <net/tc_act/tc_mpls.h> +#include <net/flow_offload.h> extern const struct nla_policy rtm_tca_policy[TCA_MAX + 1]; @@ -544,235 +546,73 @@ static void tcf_chain_flush(struct tcf_chain *chain, bool rtnl_held) } } -static struct tcf_block *tc_dev_ingress_block(struct net_device *dev) -{ - const struct Qdisc_class_ops *cops; - struct Qdisc *qdisc; - - if (!dev_ingress_queue(dev)) - return NULL; - - qdisc = dev_ingress_queue(dev)->qdisc_sleeping; - if (!qdisc) - return NULL; - - cops = qdisc->ops->cl_ops; - if (!cops) - return NULL; - - if (!cops->tcf_block) - return NULL; - - return cops->tcf_block(qdisc, TC_H_MIN_INGRESS, NULL); -} - -static struct rhashtable indr_setup_block_ht; - -struct tc_indr_block_dev { - struct rhash_head ht_node; - struct net_device *dev; - unsigned int refcnt; - struct list_head cb_list; - struct tcf_block *block; -}; - -struct tc_indr_block_cb { - struct list_head list; - void *cb_priv; - tc_indr_block_bind_cb_t *cb; - void *cb_ident; -}; - -static const struct rhashtable_params tc_indr_setup_block_ht_params = { - .key_offset = offsetof(struct tc_indr_block_dev, dev), - .head_offset = offsetof(struct tc_indr_block_dev, ht_node), - .key_len = sizeof(struct net_device *), -}; - -static struct tc_indr_block_dev * -tc_indr_block_dev_lookup(struct net_device *dev) -{ - return rhashtable_lookup_fast(&indr_setup_block_ht, &dev, - tc_indr_setup_block_ht_params); -} - -static struct tc_indr_block_dev *tc_indr_block_dev_get(struct net_device *dev) -{ - struct tc_indr_block_dev *indr_dev; - - indr_dev = tc_indr_block_dev_lookup(dev); - if (indr_dev) - goto inc_ref; - - indr_dev = kzalloc(sizeof(*indr_dev), GFP_KERNEL); - if (!indr_dev) - return NULL; - - INIT_LIST_HEAD(&indr_dev->cb_list); - indr_dev->dev = dev; - indr_dev->block = tc_dev_ingress_block(dev); - if (rhashtable_insert_fast(&indr_setup_block_ht, &indr_dev->ht_node, - tc_indr_setup_block_ht_params)) { - kfree(indr_dev); - return NULL; - } - -inc_ref: - indr_dev->refcnt++; - return indr_dev; -} - -static void tc_indr_block_dev_put(struct tc_indr_block_dev *indr_dev) -{ - if (--indr_dev->refcnt) - return; - - rhashtable_remove_fast(&indr_setup_block_ht, &indr_dev->ht_node, - tc_indr_setup_block_ht_params); - kfree(indr_dev); -} - -static struct tc_indr_block_cb * -tc_indr_block_cb_lookup(struct tc_indr_block_dev *indr_dev, - tc_indr_block_bind_cb_t *cb, void *cb_ident) -{ - struct tc_indr_block_cb *indr_block_cb; - - list_for_each_entry(indr_block_cb, &indr_dev->cb_list, list) - if (indr_block_cb->cb == cb && - indr_block_cb->cb_ident == cb_ident) - return indr_block_cb; - return NULL; -} - -static struct tc_indr_block_cb * -tc_indr_block_cb_add(struct tc_indr_block_dev *indr_dev, void *cb_priv, - tc_indr_block_bind_cb_t *cb, void *cb_ident) -{ - struct tc_indr_block_cb *indr_block_cb; - - indr_block_cb = tc_indr_block_cb_lookup(indr_dev, cb, cb_ident); - if (indr_block_cb) - return ERR_PTR(-EEXIST); - - indr_block_cb = kzalloc(sizeof(*indr_block_cb), GFP_KERNEL); - if (!indr_block_cb) - return ERR_PTR(-ENOMEM); - - indr_block_cb->cb_priv = cb_priv; - indr_block_cb->cb = cb; - indr_block_cb->cb_ident = cb_ident; - list_add(&indr_block_cb->list, &indr_dev->cb_list); - - return indr_block_cb; -} - -static void tc_indr_block_cb_del(struct tc_indr_block_cb *indr_block_cb) -{ - list_del(&indr_block_cb->list); - kfree(indr_block_cb); -} - static int tcf_block_setup(struct tcf_block *block, struct flow_block_offload *bo); -static void tc_indr_block_ing_cmd(struct tc_indr_block_dev *indr_dev, - struct tc_indr_block_cb *indr_block_cb, +static void tc_indr_block_ing_cmd(struct net_device *dev, + struct tcf_block *block, + flow_indr_block_bind_cb_t *cb, + void *cb_priv, enum flow_block_command command) { struct flow_block_offload bo = { .command = command, .binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS, - .net = dev_net(indr_dev->dev), - .block_shared = tcf_block_non_null_shared(indr_dev->block), + .net = dev_net(dev), + .block_shared = tcf_block_non_null_shared(block), }; INIT_LIST_HEAD(&bo.cb_list); - if (!indr_dev->block) + if (!block) return; - bo.block = &indr_dev->block->flow_block; - - indr_block_cb->cb(indr_dev->dev, indr_block_cb->cb_priv, TC_SETUP_BLOCK, - &bo); - tcf_block_setup(indr_dev->block, &bo); -} - -int __tc_indr_block_cb_register(struct net_device *dev, void *cb_priv, - tc_indr_block_bind_cb_t *cb, void *cb_ident) -{ - struct tc_indr_block_cb *indr_block_cb; - struct tc_indr_block_dev *indr_dev; - int err; - - indr_dev = tc_indr_block_dev_get(dev); - if (!indr_dev) - return -ENOMEM; - - indr_block_cb = tc_indr_block_cb_add(indr_dev, cb_priv, cb, cb_ident); - err = PTR_ERR_OR_ZERO(indr_block_cb); - if (err) - goto err_dev_put; + bo.block = &block->flow_block; - tc_indr_block_ing_cmd(indr_dev, indr_block_cb, FLOW_BLOCK_BIND); - return 0; + down_write(&block->cb_lock); + cb(dev, cb_priv, TC_SETUP_BLOCK, &bo); -err_dev_put: - tc_indr_block_dev_put(indr_dev); - return err; + tcf_block_setup(block, &bo); + up_write(&block->cb_lock); } -EXPORT_SYMBOL_GPL(__tc_indr_block_cb_register); -int tc_indr_block_cb_register(struct net_device *dev, void *cb_priv, - tc_indr_block_bind_cb_t *cb, void *cb_ident) +static struct tcf_block *tc_dev_ingress_block(struct net_device *dev) { - int err; - - rtnl_lock(); - err = __tc_indr_block_cb_register(dev, cb_priv, cb, cb_ident); - rtnl_unlock(); + const struct Qdisc_class_ops *cops; + struct Qdisc *qdisc; - return err; -} -EXPORT_SYMBOL_GPL(tc_indr_block_cb_register); + if (!dev_ingress_queue(dev)) + return NULL; -void __tc_indr_block_cb_unregister(struct net_device *dev, - tc_indr_block_bind_cb_t *cb, void *cb_ident) -{ - struct tc_indr_block_cb *indr_block_cb; - struct tc_indr_block_dev *indr_dev; + qdisc = dev_ingress_queue(dev)->qdisc_sleeping; + if (!qdisc) + return NULL; - indr_dev = tc_indr_block_dev_lookup(dev); - if (!indr_dev) - return; + cops = qdisc->ops->cl_ops; + if (!cops) + return NULL; - indr_block_cb = tc_indr_block_cb_lookup(indr_dev, cb, cb_ident); - if (!indr_block_cb) - return; + if (!cops->tcf_block) + return NULL; - /* Send unbind message if required to free any block cbs. */ - tc_indr_block_ing_cmd(indr_dev, indr_block_cb, FLOW_BLOCK_UNBIND); - tc_indr_block_cb_del(indr_block_cb); - tc_indr_block_dev_put(indr_dev); + return cops->tcf_block(qdisc, TC_H_MIN_INGRESS, NULL); } -EXPORT_SYMBOL_GPL(__tc_indr_block_cb_unregister); -void tc_indr_block_cb_unregister(struct net_device *dev, - tc_indr_block_bind_cb_t *cb, void *cb_ident) +static void tc_indr_block_get_and_ing_cmd(struct net_device *dev, + flow_indr_block_bind_cb_t *cb, + void *cb_priv, + enum flow_block_command command) { - rtnl_lock(); - __tc_indr_block_cb_unregister(dev, cb, cb_ident); - rtnl_unlock(); + struct tcf_block *block = tc_dev_ingress_block(dev); + + tc_indr_block_ing_cmd(dev, block, cb, cb_priv, command); } -EXPORT_SYMBOL_GPL(tc_indr_block_cb_unregister); -static void tc_indr_block_call(struct tcf_block *block, struct net_device *dev, +static void tc_indr_block_call(struct tcf_block *block, + struct net_device *dev, struct tcf_block_ext_info *ei, enum flow_block_command command, struct netlink_ext_ack *extack) { - struct tc_indr_block_cb *indr_block_cb; - struct tc_indr_block_dev *indr_dev; struct flow_block_offload bo = { .command = command, .binder_type = ei->binder_type, @@ -783,22 +623,13 @@ static void tc_indr_block_call(struct tcf_block *block, struct net_device *dev, }; INIT_LIST_HEAD(&bo.cb_list); - indr_dev = tc_indr_block_dev_lookup(dev); - if (!indr_dev) - return; - - indr_dev->block = command == FLOW_BLOCK_BIND ? block : NULL; - - list_for_each_entry(indr_block_cb, &indr_dev->cb_list, list) - indr_block_cb->cb(dev, indr_block_cb->cb_priv, TC_SETUP_BLOCK, - &bo); - + flow_indr_block_call(dev, &bo, command); tcf_block_setup(block, &bo); } static bool tcf_block_offload_in_use(struct tcf_block *block) { - return block->offloadcnt; + return atomic_read(&block->offloadcnt); } static int tcf_block_offload_cmd(struct tcf_block *block, @@ -832,6 +663,7 @@ static int tcf_block_offload_bind(struct tcf_block *block, struct Qdisc *q, struct net_device *dev = q->dev_queue->dev; int err; + down_write(&block->cb_lock); if (!dev->netdev_ops->ndo_setup_tc) goto no_offload_dev_inc; @@ -840,24 +672,31 @@ static int tcf_block_offload_bind(struct tcf_block *block, struct Qdisc *q, */ if (!tc_can_offload(dev) && tcf_block_offload_in_use(block)) { NL_SET_ERR_MSG(extack, "Bind to offloaded block failed as dev has offload disabled"); - return -EOPNOTSUPP; + err = -EOPNOTSUPP; + goto err_unlock; } err = tcf_block_offload_cmd(block, dev, ei, FLOW_BLOCK_BIND, extack); if (err == -EOPNOTSUPP) goto no_offload_dev_inc; if (err) - return err; + goto err_unlock; tc_indr_block_call(block, dev, ei, FLOW_BLOCK_BIND, extack); + up_write(&block->cb_lock); return 0; no_offload_dev_inc: - if (tcf_block_offload_in_use(block)) - return -EOPNOTSUPP; + if (tcf_block_offload_in_use(block)) { + err = -EOPNOTSUPP; + goto err_unlock; + } + err = 0; block->nooffloaddevcnt++; tc_indr_block_call(block, dev, ei, FLOW_BLOCK_BIND, extack); - return 0; +err_unlock: + up_write(&block->cb_lock); + return err; } static void tcf_block_offload_unbind(struct tcf_block *block, struct Qdisc *q, @@ -866,6 +705,7 @@ static void tcf_block_offload_unbind(struct tcf_block *block, struct Qdisc *q, struct net_device *dev = q->dev_queue->dev; int err; + down_write(&block->cb_lock); tc_indr_block_call(block, dev, ei, FLOW_BLOCK_UNBIND, NULL); if (!dev->netdev_ops->ndo_setup_tc) @@ -873,10 +713,12 @@ static void tcf_block_offload_unbind(struct tcf_block *block, struct Qdisc *q, err = tcf_block_offload_cmd(block, dev, ei, FLOW_BLOCK_UNBIND, NULL); if (err == -EOPNOTSUPP) goto no_offload_dev_dec; + up_write(&block->cb_lock); return; no_offload_dev_dec: WARN_ON(block->nooffloaddevcnt-- == 0); + up_write(&block->cb_lock); } static int @@ -991,6 +833,7 @@ static struct tcf_block *tcf_block_create(struct net *net, struct Qdisc *q, return ERR_PTR(-ENOMEM); } mutex_init(&block->lock); + init_rwsem(&block->cb_lock); flow_block_init(&block->flow_block); INIT_LIST_HEAD(&block->chain_list); INIT_LIST_HEAD(&block->owner_list); @@ -1526,6 +1369,8 @@ tcf_block_playback_offloads(struct tcf_block *block, flow_setup_cb_t *cb, struct tcf_proto *tp, *tp_prev; int err; + lockdep_assert_held(&block->cb_lock); + for (chain = __tcf_get_next_chain(block, NULL); chain; chain_prev = chain, @@ -1564,6 +1409,8 @@ static int tcf_block_bind(struct tcf_block *block, struct flow_block_cb *block_cb, *next; int err, i = 0; + lockdep_assert_held(&block->cb_lock); + list_for_each_entry(block_cb, &bo->cb_list, list) { err = tcf_block_playback_offloads(block, block_cb->cb, block_cb->cb_priv, true, @@ -1571,6 +1418,8 @@ static int tcf_block_bind(struct tcf_block *block, bo->extack); if (err) goto err_unroll; + if (!bo->unlocked_driver_cb) + block->lockeddevcnt++; i++; } @@ -1586,6 +1435,8 @@ err_unroll: block_cb->cb_priv, false, tcf_block_offload_in_use(block), NULL); + if (!bo->unlocked_driver_cb) + block->lockeddevcnt--; } flow_block_cb_free(block_cb); } @@ -1598,6 +1449,8 @@ static void tcf_block_unbind(struct tcf_block *block, { struct flow_block_cb *block_cb, *next; + lockdep_assert_held(&block->cb_lock); + list_for_each_entry_safe(block_cb, next, &bo->cb_list, list) { tcf_block_playback_offloads(block, block_cb->cb, block_cb->cb_priv, false, @@ -1605,6 +1458,8 @@ static void tcf_block_unbind(struct tcf_block *block, NULL); list_del(&block_cb->list); flow_block_cb_free(block_cb); + if (!bo->unlocked_driver_cb) + block->lockeddevcnt--; } } @@ -1659,6 +1514,18 @@ reclassify: goto reset; } else if (unlikely(TC_ACT_EXT_CMP(err, TC_ACT_GOTO_CHAIN))) { first_tp = res->goto_tp; + +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) + { + struct tc_skb_ext *ext; + + ext = skb_ext_add(skb, TC_SKB_EXT); + if (WARN_ON_ONCE(!ext)) + return TC_ACT_SHOT; + + ext->chain = err & TC_ACT_EXT_VAL_MASK; + } +#endif goto reset; } #endif @@ -3151,17 +3018,61 @@ int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts) } EXPORT_SYMBOL(tcf_exts_dump_stats); -int tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type, - void *type_data, bool err_stop) +static void tcf_block_offload_inc(struct tcf_block *block, u32 *flags) +{ + if (*flags & TCA_CLS_FLAGS_IN_HW) + return; + *flags |= TCA_CLS_FLAGS_IN_HW; + atomic_inc(&block->offloadcnt); +} + +static void tcf_block_offload_dec(struct tcf_block *block, u32 *flags) +{ + if (!(*flags & TCA_CLS_FLAGS_IN_HW)) + return; + *flags &= ~TCA_CLS_FLAGS_IN_HW; + atomic_dec(&block->offloadcnt); +} + +static void tc_cls_offload_cnt_update(struct tcf_block *block, + struct tcf_proto *tp, u32 *cnt, + u32 *flags, u32 diff, bool add) +{ + lockdep_assert_held(&block->cb_lock); + + spin_lock(&tp->lock); + if (add) { + if (!*cnt) + tcf_block_offload_inc(block, flags); + *cnt += diff; + } else { + *cnt -= diff; + if (!*cnt) + tcf_block_offload_dec(block, flags); + } + spin_unlock(&tp->lock); +} + +static void +tc_cls_offload_cnt_reset(struct tcf_block *block, struct tcf_proto *tp, + u32 *cnt, u32 *flags) +{ + lockdep_assert_held(&block->cb_lock); + + spin_lock(&tp->lock); + tcf_block_offload_dec(block, flags); + *cnt = 0; + spin_unlock(&tp->lock); +} + +static int +__tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type, + void *type_data, bool err_stop) { struct flow_block_cb *block_cb; int ok_count = 0; int err; - /* Make sure all netdevs sharing this block are offload-capable. */ - if (block->nooffloaddevcnt && err_stop) - return -EOPNOTSUPP; - list_for_each_entry(block_cb, &block->flow_block.cb_list, list) { err = block_cb->cb(type, type_data, block_cb->cb_priv); if (err) { @@ -3173,17 +3084,261 @@ int tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type, } return ok_count; } + +int tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type, + void *type_data, bool err_stop, bool rtnl_held) +{ + bool take_rtnl = READ_ONCE(block->lockeddevcnt) && !rtnl_held; + int ok_count; + +retry: + if (take_rtnl) + rtnl_lock(); + down_read(&block->cb_lock); + /* Need to obtain rtnl lock if block is bound to devs that require it. + * In block bind code cb_lock is obtained while holding rtnl, so we must + * obtain the locks in same order here. + */ + if (!rtnl_held && !take_rtnl && block->lockeddevcnt) { + up_read(&block->cb_lock); + take_rtnl = true; + goto retry; + } + + ok_count = __tc_setup_cb_call(block, type, type_data, err_stop); + + up_read(&block->cb_lock); + if (take_rtnl) + rtnl_unlock(); + return ok_count; +} EXPORT_SYMBOL(tc_setup_cb_call); +/* Non-destructive filter add. If filter that wasn't already in hardware is + * successfully offloaded, increment block offloads counter. On failure, + * previously offloaded filter is considered to be intact and offloads counter + * is not decremented. + */ + +int tc_setup_cb_add(struct tcf_block *block, struct tcf_proto *tp, + enum tc_setup_type type, void *type_data, bool err_stop, + u32 *flags, unsigned int *in_hw_count, bool rtnl_held) +{ + bool take_rtnl = READ_ONCE(block->lockeddevcnt) && !rtnl_held; + int ok_count; + +retry: + if (take_rtnl) + rtnl_lock(); + down_read(&block->cb_lock); + /* Need to obtain rtnl lock if block is bound to devs that require it. + * In block bind code cb_lock is obtained while holding rtnl, so we must + * obtain the locks in same order here. + */ + if (!rtnl_held && !take_rtnl && block->lockeddevcnt) { + up_read(&block->cb_lock); + take_rtnl = true; + goto retry; + } + + /* Make sure all netdevs sharing this block are offload-capable. */ + if (block->nooffloaddevcnt && err_stop) { + ok_count = -EOPNOTSUPP; + goto err_unlock; + } + + ok_count = __tc_setup_cb_call(block, type, type_data, err_stop); + if (ok_count < 0) + goto err_unlock; + + if (tp->ops->hw_add) + tp->ops->hw_add(tp, type_data); + if (ok_count > 0) + tc_cls_offload_cnt_update(block, tp, in_hw_count, flags, + ok_count, true); +err_unlock: + up_read(&block->cb_lock); + if (take_rtnl) + rtnl_unlock(); + return ok_count < 0 ? ok_count : 0; +} +EXPORT_SYMBOL(tc_setup_cb_add); + +/* Destructive filter replace. If filter that wasn't already in hardware is + * successfully offloaded, increment block offload counter. On failure, + * previously offloaded filter is considered to be destroyed and offload counter + * is decremented. + */ + +int tc_setup_cb_replace(struct tcf_block *block, struct tcf_proto *tp, + enum tc_setup_type type, void *type_data, bool err_stop, + u32 *old_flags, unsigned int *old_in_hw_count, + u32 *new_flags, unsigned int *new_in_hw_count, + bool rtnl_held) +{ + bool take_rtnl = READ_ONCE(block->lockeddevcnt) && !rtnl_held; + int ok_count; + +retry: + if (take_rtnl) + rtnl_lock(); + down_read(&block->cb_lock); + /* Need to obtain rtnl lock if block is bound to devs that require it. + * In block bind code cb_lock is obtained while holding rtnl, so we must + * obtain the locks in same order here. + */ + if (!rtnl_held && !take_rtnl && block->lockeddevcnt) { + up_read(&block->cb_lock); + take_rtnl = true; + goto retry; + } + + /* Make sure all netdevs sharing this block are offload-capable. */ + if (block->nooffloaddevcnt && err_stop) { + ok_count = -EOPNOTSUPP; + goto err_unlock; + } + + tc_cls_offload_cnt_reset(block, tp, old_in_hw_count, old_flags); + if (tp->ops->hw_del) + tp->ops->hw_del(tp, type_data); + + ok_count = __tc_setup_cb_call(block, type, type_data, err_stop); + if (ok_count < 0) + goto err_unlock; + + if (tp->ops->hw_add) + tp->ops->hw_add(tp, type_data); + if (ok_count > 0) + tc_cls_offload_cnt_update(block, tp, new_in_hw_count, + new_flags, ok_count, true); +err_unlock: + up_read(&block->cb_lock); + if (take_rtnl) + rtnl_unlock(); + return ok_count < 0 ? ok_count : 0; +} +EXPORT_SYMBOL(tc_setup_cb_replace); + +/* Destroy filter and decrement block offload counter, if filter was previously + * offloaded. + */ + +int tc_setup_cb_destroy(struct tcf_block *block, struct tcf_proto *tp, + enum tc_setup_type type, void *type_data, bool err_stop, + u32 *flags, unsigned int *in_hw_count, bool rtnl_held) +{ + bool take_rtnl = READ_ONCE(block->lockeddevcnt) && !rtnl_held; + int ok_count; + +retry: + if (take_rtnl) + rtnl_lock(); + down_read(&block->cb_lock); + /* Need to obtain rtnl lock if block is bound to devs that require it. + * In block bind code cb_lock is obtained while holding rtnl, so we must + * obtain the locks in same order here. + */ + if (!rtnl_held && !take_rtnl && block->lockeddevcnt) { + up_read(&block->cb_lock); + take_rtnl = true; + goto retry; + } + + ok_count = __tc_setup_cb_call(block, type, type_data, err_stop); + + tc_cls_offload_cnt_reset(block, tp, in_hw_count, flags); + if (tp->ops->hw_del) + tp->ops->hw_del(tp, type_data); + + up_read(&block->cb_lock); + if (take_rtnl) + rtnl_unlock(); + return ok_count < 0 ? ok_count : 0; +} +EXPORT_SYMBOL(tc_setup_cb_destroy); + +int tc_setup_cb_reoffload(struct tcf_block *block, struct tcf_proto *tp, + bool add, flow_setup_cb_t *cb, + enum tc_setup_type type, void *type_data, + void *cb_priv, u32 *flags, unsigned int *in_hw_count) +{ + int err = cb(type, type_data, cb_priv); + + if (err) { + if (add && tc_skip_sw(*flags)) + return err; + } else { + tc_cls_offload_cnt_update(block, tp, in_hw_count, flags, 1, + add); + } + + return 0; +} +EXPORT_SYMBOL(tc_setup_cb_reoffload); + +void tc_cleanup_flow_action(struct flow_action *flow_action) +{ + struct flow_action_entry *entry; + int i; + + flow_action_for_each(i, entry, flow_action) + if (entry->destructor) + entry->destructor(entry->destructor_priv); +} +EXPORT_SYMBOL(tc_cleanup_flow_action); + +static void tcf_mirred_get_dev(struct flow_action_entry *entry, + const struct tc_action *act) +{ +#ifdef CONFIG_NET_CLS_ACT + entry->dev = act->ops->get_dev(act, &entry->destructor); + if (!entry->dev) + return; + entry->destructor_priv = entry->dev; +#endif +} + +static void tcf_tunnel_encap_put_tunnel(void *priv) +{ + struct ip_tunnel_info *tunnel = priv; + + kfree(tunnel); +} + +static int tcf_tunnel_encap_get_tunnel(struct flow_action_entry *entry, + const struct tc_action *act) +{ + entry->tunnel = tcf_tunnel_info_copy(act); + if (!entry->tunnel) + return -ENOMEM; + entry->destructor = tcf_tunnel_encap_put_tunnel; + entry->destructor_priv = entry->tunnel; + return 0; +} + +static void tcf_sample_get_group(struct flow_action_entry *entry, + const struct tc_action *act) +{ +#ifdef CONFIG_NET_CLS_ACT + entry->sample.psample_group = + act->ops->get_psample_group(act, &entry->destructor); + entry->destructor_priv = entry->sample.psample_group; +#endif +} + int tc_setup_flow_action(struct flow_action *flow_action, - const struct tcf_exts *exts) + const struct tcf_exts *exts, bool rtnl_held) { const struct tc_action *act; - int i, j, k; + int i, j, k, err = 0; if (!exts) return 0; + if (!rtnl_held) + rtnl_lock(); + j = 0; tcf_exts_for_each_action(i, act, exts) { struct flow_action_entry *entry; @@ -3200,10 +3355,16 @@ int tc_setup_flow_action(struct flow_action *flow_action, entry->chain_index = tcf_gact_goto_chain_index(act); } else if (is_tcf_mirred_egress_redirect(act)) { entry->id = FLOW_ACTION_REDIRECT; - entry->dev = tcf_mirred_dev(act); + tcf_mirred_get_dev(entry, act); } else if (is_tcf_mirred_egress_mirror(act)) { entry->id = FLOW_ACTION_MIRRED; - entry->dev = tcf_mirred_dev(act); + tcf_mirred_get_dev(entry, act); + } else if (is_tcf_mirred_ingress_redirect(act)) { + entry->id = FLOW_ACTION_REDIRECT_INGRESS; + tcf_mirred_get_dev(entry, act); + } else if (is_tcf_mirred_ingress_mirror(act)) { + entry->id = FLOW_ACTION_MIRRED_INGRESS; + tcf_mirred_get_dev(entry, act); } else if (is_tcf_vlan(act)) { switch (tcf_vlan_action(act)) { case TCA_VLAN_ACT_PUSH: @@ -3222,11 +3383,14 @@ int tc_setup_flow_action(struct flow_action *flow_action, entry->vlan.prio = tcf_vlan_push_prio(act); break; default: + err = -EOPNOTSUPP; goto err_out; } } else if (is_tcf_tunnel_set(act)) { entry->id = FLOW_ACTION_TUNNEL_ENCAP; - entry->tunnel = tcf_tunnel_info(act); + err = tcf_tunnel_encap_get_tunnel(entry, act); + if (err) + goto err_out; } else if (is_tcf_tunnel_release(act)) { entry->id = FLOW_ACTION_TUNNEL_DECAP; } else if (is_tcf_pedit(act)) { @@ -3239,6 +3403,7 @@ int tc_setup_flow_action(struct flow_action *flow_action, entry->id = FLOW_ACTION_ADD; break; default: + err = -EOPNOTSUPP; goto err_out; } entry->mangle.htype = tcf_pedit_htype(act, k); @@ -3255,11 +3420,10 @@ int tc_setup_flow_action(struct flow_action *flow_action, entry->mark = tcf_skbedit_mark(act); } else if (is_tcf_sample(act)) { entry->id = FLOW_ACTION_SAMPLE; - entry->sample.psample_group = - tcf_sample_psample_group(act); entry->sample.trunc_size = tcf_sample_trunc_size(act); entry->sample.truncate = tcf_sample_truncate(act); entry->sample.rate = tcf_sample_rate(act); + tcf_sample_get_group(entry, act); } else if (is_tcf_police(act)) { entry->id = FLOW_ACTION_POLICE; entry->police.burst = tcf_police_tcfp_burst(act); @@ -3269,16 +3433,50 @@ int tc_setup_flow_action(struct flow_action *flow_action, entry->id = FLOW_ACTION_CT; entry->ct.action = tcf_ct_action(act); entry->ct.zone = tcf_ct_zone(act); + } else if (is_tcf_mpls(act)) { + switch (tcf_mpls_action(act)) { + case TCA_MPLS_ACT_PUSH: + entry->id = FLOW_ACTION_MPLS_PUSH; + entry->mpls_push.proto = tcf_mpls_proto(act); + entry->mpls_push.label = tcf_mpls_label(act); + entry->mpls_push.tc = tcf_mpls_tc(act); + entry->mpls_push.bos = tcf_mpls_bos(act); + entry->mpls_push.ttl = tcf_mpls_ttl(act); + break; + case TCA_MPLS_ACT_POP: + entry->id = FLOW_ACTION_MPLS_POP; + entry->mpls_pop.proto = tcf_mpls_proto(act); + break; + case TCA_MPLS_ACT_MODIFY: + entry->id = FLOW_ACTION_MPLS_MANGLE; + entry->mpls_mangle.label = tcf_mpls_label(act); + entry->mpls_mangle.tc = tcf_mpls_tc(act); + entry->mpls_mangle.bos = tcf_mpls_bos(act); + entry->mpls_mangle.ttl = tcf_mpls_ttl(act); + break; + default: + goto err_out; + } + } else if (is_tcf_skbedit_ptype(act)) { + entry->id = FLOW_ACTION_PTYPE; + entry->ptype = tcf_skbedit_ptype(act); } else { + err = -EOPNOTSUPP; goto err_out; } if (!is_tcf_pedit(act)) j++; } - return 0; + err_out: - return -EOPNOTSUPP; + if (!rtnl_held) + rtnl_unlock(); + + if (err) + tc_cleanup_flow_action(flow_action); + + return err; } EXPORT_SYMBOL(tc_setup_flow_action); @@ -3321,6 +3519,11 @@ static struct pernet_operations tcf_net_ops = { .size = sizeof(struct tcf_net), }; +static struct flow_indr_block_ing_entry block_ing_entry = { + .cb = tc_indr_block_get_and_ing_cmd, + .list = LIST_HEAD_INIT(block_ing_entry.list), +}; + static int __init tc_filter_init(void) { int err; @@ -3333,10 +3536,7 @@ static int __init tc_filter_init(void) if (err) goto err_register_pernet_subsys; - err = rhashtable_init(&indr_setup_block_ht, - &tc_indr_setup_block_ht_params); - if (err) - goto err_rhash_setup_block_ht; + flow_indr_add_block_ing_cb(&block_ing_entry); rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_new_tfilter, NULL, RTNL_FLAG_DOIT_UNLOCKED); @@ -3351,8 +3551,6 @@ static int __init tc_filter_init(void) return 0; -err_rhash_setup_block_ht: - unregister_pernet_subsys(&tcf_net_ops); err_register_pernet_subsys: destroy_workqueue(tc_filter_wq); return err; diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index 3f7a9c02b70c..bf10bdaf5012 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -163,17 +163,19 @@ static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog, cls_bpf.exts_integrated = obj->exts_integrated; if (oldprog) - tcf_block_offload_dec(block, &oldprog->gen_flags); + err = tc_setup_cb_replace(block, tp, TC_SETUP_CLSBPF, &cls_bpf, + skip_sw, &oldprog->gen_flags, + &oldprog->in_hw_count, + &prog->gen_flags, &prog->in_hw_count, + true); + else + err = tc_setup_cb_add(block, tp, TC_SETUP_CLSBPF, &cls_bpf, + skip_sw, &prog->gen_flags, + &prog->in_hw_count, true); - err = tc_setup_cb_call(block, TC_SETUP_CLSBPF, &cls_bpf, skip_sw); - if (prog) { - if (err < 0) { - cls_bpf_offload_cmd(tp, oldprog, prog, extack); - return err; - } else if (err > 0) { - prog->in_hw_count = err; - tcf_block_offload_inc(block, &prog->gen_flags); - } + if (prog && err) { + cls_bpf_offload_cmd(tp, oldprog, prog, extack); + return err; } if (prog && skip_sw && !(prog->gen_flags & TCA_CLS_FLAGS_IN_HW)) @@ -230,7 +232,7 @@ static void cls_bpf_offload_update_stats(struct tcf_proto *tp, cls_bpf.name = prog->bpf_name; cls_bpf.exts_integrated = prog->exts_integrated; - tc_setup_cb_call(block, TC_SETUP_CLSBPF, &cls_bpf, false); + tc_setup_cb_call(block, TC_SETUP_CLSBPF, &cls_bpf, false, true); } static int cls_bpf_init(struct tcf_proto *tp) @@ -673,15 +675,11 @@ static int cls_bpf_reoffload(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb cls_bpf.name = prog->bpf_name; cls_bpf.exts_integrated = prog->exts_integrated; - err = cb(TC_SETUP_CLSBPF, &cls_bpf, cb_priv); - if (err) { - if (add && tc_skip_sw(prog->gen_flags)) - return err; - continue; - } - - tc_cls_offload_cnt_update(block, &prog->in_hw_count, - &prog->gen_flags, add); + err = tc_setup_cb_reoffload(block, tp, add, cb, TC_SETUP_CLSBPF, + &cls_bpf, cb_priv, &prog->gen_flags, + &prog->in_hw_count); + if (err) + return err; } return 0; diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 054123742e32..74221e3351c3 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -412,41 +412,27 @@ static void fl_hw_destroy_filter(struct tcf_proto *tp, struct cls_fl_filter *f, struct tcf_block *block = tp->chain->block; struct flow_cls_offload cls_flower = {}; - if (!rtnl_held) - rtnl_lock(); - tc_cls_common_offload_init(&cls_flower.common, tp, f->flags, extack); cls_flower.command = FLOW_CLS_DESTROY; cls_flower.cookie = (unsigned long) f; - tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false); - spin_lock(&tp->lock); - list_del_init(&f->hw_list); - tcf_block_offload_dec(block, &f->flags); - spin_unlock(&tp->lock); + tc_setup_cb_destroy(block, tp, TC_SETUP_CLSFLOWER, &cls_flower, false, + &f->flags, &f->in_hw_count, rtnl_held); - if (!rtnl_held) - rtnl_unlock(); } static int fl_hw_replace_filter(struct tcf_proto *tp, struct cls_fl_filter *f, bool rtnl_held, struct netlink_ext_ack *extack) { - struct cls_fl_head *head = fl_head_dereference(tp); struct tcf_block *block = tp->chain->block; struct flow_cls_offload cls_flower = {}; bool skip_sw = tc_skip_sw(f->flags); int err = 0; - if (!rtnl_held) - rtnl_lock(); - cls_flower.rule = flow_rule_alloc(tcf_exts_num_actions(&f->exts)); - if (!cls_flower.rule) { - err = -ENOMEM; - goto errout; - } + if (!cls_flower.rule) + return -ENOMEM; tc_cls_common_offload_init(&cls_flower.common, tp, f->flags, extack); cls_flower.command = FLOW_CLS_REPLACE; @@ -456,43 +442,31 @@ static int fl_hw_replace_filter(struct tcf_proto *tp, cls_flower.rule->match.key = &f->mkey; cls_flower.classid = f->res.classid; - err = tc_setup_flow_action(&cls_flower.rule->action, &f->exts); + err = tc_setup_flow_action(&cls_flower.rule->action, &f->exts, + rtnl_held); if (err) { kfree(cls_flower.rule); - if (skip_sw) + if (skip_sw) { NL_SET_ERR_MSG_MOD(extack, "Failed to setup flow action"); - else - err = 0; - goto errout; + return err; + } + return 0; } - err = tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, skip_sw); + err = tc_setup_cb_add(block, tp, TC_SETUP_CLSFLOWER, &cls_flower, + skip_sw, &f->flags, &f->in_hw_count, rtnl_held); + tc_cleanup_flow_action(&cls_flower.rule->action); kfree(cls_flower.rule); - if (err < 0) { - fl_hw_destroy_filter(tp, f, true, NULL); - goto errout; - } else if (err > 0) { - f->in_hw_count = err; - err = 0; - spin_lock(&tp->lock); - tcf_block_offload_inc(block, &f->flags); - spin_unlock(&tp->lock); - } - - if (skip_sw && !(f->flags & TCA_CLS_FLAGS_IN_HW)) { - err = -EINVAL; - goto errout; + if (err) { + fl_hw_destroy_filter(tp, f, rtnl_held, NULL); + return err; } - spin_lock(&tp->lock); - list_add(&f->hw_list, &head->hw_filters); - spin_unlock(&tp->lock); -errout: - if (!rtnl_held) - rtnl_unlock(); + if (skip_sw && !(f->flags & TCA_CLS_FLAGS_IN_HW)) + return -EINVAL; - return err; + return 0; } static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f, @@ -501,22 +475,17 @@ static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f, struct tcf_block *block = tp->chain->block; struct flow_cls_offload cls_flower = {}; - if (!rtnl_held) - rtnl_lock(); - tc_cls_common_offload_init(&cls_flower.common, tp, f->flags, NULL); cls_flower.command = FLOW_CLS_STATS; cls_flower.cookie = (unsigned long) f; cls_flower.classid = f->res.classid; - tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false); + tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false, + rtnl_held); tcf_exts_stats_update(&f->exts, cls_flower.stats.bytes, cls_flower.stats.pkts, cls_flower.stats.lastused); - - if (!rtnl_held) - rtnl_unlock(); } static void __fl_put(struct cls_fl_filter *f) @@ -1831,7 +1800,8 @@ static int fl_reoffload(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb, cls_flower.rule->match.mask = &f->mask->key; cls_flower.rule->match.key = &f->mkey; - err = tc_setup_flow_action(&cls_flower.rule->action, &f->exts); + err = tc_setup_flow_action(&cls_flower.rule->action, &f->exts, + true); if (err) { kfree(cls_flower.rule); if (tc_skip_sw(f->flags)) { @@ -1844,21 +1814,17 @@ static int fl_reoffload(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb, cls_flower.classid = f->res.classid; - err = cb(TC_SETUP_CLSFLOWER, &cls_flower, cb_priv); + err = tc_setup_cb_reoffload(block, tp, add, cb, + TC_SETUP_CLSFLOWER, &cls_flower, + cb_priv, &f->flags, + &f->in_hw_count); + tc_cleanup_flow_action(&cls_flower.rule->action); kfree(cls_flower.rule); if (err) { - if (add && tc_skip_sw(f->flags)) { - __fl_put(f); - return err; - } - goto next_flow; + __fl_put(f); + return err; } - - spin_lock(&tp->lock); - tc_cls_offload_cnt_update(block, &f->in_hw_count, &f->flags, - add); - spin_unlock(&tp->lock); next_flow: __fl_put(f); } @@ -1866,6 +1832,30 @@ next_flow: return 0; } +static void fl_hw_add(struct tcf_proto *tp, void *type_data) +{ + struct flow_cls_offload *cls_flower = type_data; + struct cls_fl_filter *f = + (struct cls_fl_filter *) cls_flower->cookie; + struct cls_fl_head *head = fl_head_dereference(tp); + + spin_lock(&tp->lock); + list_add(&f->hw_list, &head->hw_filters); + spin_unlock(&tp->lock); +} + +static void fl_hw_del(struct tcf_proto *tp, void *type_data) +{ + struct flow_cls_offload *cls_flower = type_data; + struct cls_fl_filter *f = + (struct cls_fl_filter *) cls_flower->cookie; + + spin_lock(&tp->lock); + if (!list_empty(&f->hw_list)) + list_del_init(&f->hw_list); + spin_unlock(&tp->lock); +} + static int fl_hw_create_tmplt(struct tcf_chain *chain, struct fl_flow_tmplt *tmplt) { @@ -1886,7 +1876,7 @@ static int fl_hw_create_tmplt(struct tcf_chain *chain, /* We don't care if driver (any of them) fails to handle this * call. It serves just as a hint for it. */ - tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false); + tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false, true); kfree(cls_flower.rule); return 0; @@ -1902,7 +1892,7 @@ static void fl_hw_destroy_tmplt(struct tcf_chain *chain, cls_flower.command = FLOW_CLS_TMPLT_DESTROY; cls_flower.cookie = (unsigned long) tmplt; - tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false); + tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false, true); } static void *fl_tmplt_create(struct net *net, struct tcf_chain *chain, @@ -2526,6 +2516,8 @@ static struct tcf_proto_ops cls_fl_ops __read_mostly = { .delete = fl_delete, .walk = fl_walk, .reoffload = fl_reoffload, + .hw_add = fl_hw_add, + .hw_del = fl_hw_del, .dump = fl_dump, .bind_class = fl_bind_class, .tmplt_create = fl_tmplt_create, diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c index 455ea2793f9b..7fc2eb62aa98 100644 --- a/net/sched/cls_matchall.c +++ b/net/sched/cls_matchall.c @@ -75,8 +75,8 @@ static void mall_destroy_hw_filter(struct tcf_proto *tp, cls_mall.command = TC_CLSMATCHALL_DESTROY; cls_mall.cookie = cookie; - tc_setup_cb_call(block, TC_SETUP_CLSMATCHALL, &cls_mall, false); - tcf_block_offload_dec(block, &head->flags); + tc_setup_cb_destroy(block, tp, TC_SETUP_CLSMATCHALL, &cls_mall, false, + &head->flags, &head->in_hw_count, true); } static int mall_replace_hw_filter(struct tcf_proto *tp, @@ -97,7 +97,7 @@ static int mall_replace_hw_filter(struct tcf_proto *tp, cls_mall.command = TC_CLSMATCHALL_REPLACE; cls_mall.cookie = cookie; - err = tc_setup_flow_action(&cls_mall.rule->action, &head->exts); + err = tc_setup_flow_action(&cls_mall.rule->action, &head->exts, true); if (err) { kfree(cls_mall.rule); mall_destroy_hw_filter(tp, head, cookie, NULL); @@ -109,15 +109,14 @@ static int mall_replace_hw_filter(struct tcf_proto *tp, return err; } - err = tc_setup_cb_call(block, TC_SETUP_CLSMATCHALL, &cls_mall, skip_sw); + err = tc_setup_cb_add(block, tp, TC_SETUP_CLSMATCHALL, &cls_mall, + skip_sw, &head->flags, &head->in_hw_count, true); + tc_cleanup_flow_action(&cls_mall.rule->action); kfree(cls_mall.rule); - if (err < 0) { + if (err) { mall_destroy_hw_filter(tp, head, cookie, NULL); return err; - } else if (err > 0) { - head->in_hw_count = err; - tcf_block_offload_inc(block, &head->flags); } if (skip_sw && !(head->flags & TCA_CLS_FLAGS_IN_HW)) @@ -302,7 +301,7 @@ static int mall_reoffload(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb, TC_CLSMATCHALL_REPLACE : TC_CLSMATCHALL_DESTROY; cls_mall.cookie = (unsigned long)head; - err = tc_setup_flow_action(&cls_mall.rule->action, &head->exts); + err = tc_setup_flow_action(&cls_mall.rule->action, &head->exts, true); if (err) { kfree(cls_mall.rule); if (add && tc_skip_sw(head->flags)) { @@ -312,16 +311,14 @@ static int mall_reoffload(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb, return 0; } - err = cb(TC_SETUP_CLSMATCHALL, &cls_mall, cb_priv); + err = tc_setup_cb_reoffload(block, tp, add, cb, TC_SETUP_CLSMATCHALL, + &cls_mall, cb_priv, &head->flags, + &head->in_hw_count); + tc_cleanup_flow_action(&cls_mall.rule->action); kfree(cls_mall.rule); - if (err) { - if (add && tc_skip_sw(head->flags)) - return err; - return 0; - } - - tc_cls_offload_cnt_update(block, &head->in_hw_count, &head->flags, add); + if (err) + return err; return 0; } @@ -337,7 +334,7 @@ static void mall_stats_hw_filter(struct tcf_proto *tp, cls_mall.command = TC_CLSMATCHALL_STATS; cls_mall.cookie = cookie; - tc_setup_cb_call(block, TC_SETUP_CLSMATCHALL, &cls_mall, false); + tc_setup_cb_call(block, TC_SETUP_CLSMATCHALL, &cls_mall, false, true); tcf_exts_stats_update(&head->exts, cls_mall.stats.bytes, cls_mall.stats.pkts, cls_mall.stats.lastused); diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 8614088edd1b..a0e6fac613de 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -480,7 +480,7 @@ static void u32_clear_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h, cls_u32.hnode.handle = h->handle; cls_u32.hnode.prio = h->prio; - tc_setup_cb_call(block, TC_SETUP_CLSU32, &cls_u32, false); + tc_setup_cb_call(block, TC_SETUP_CLSU32, &cls_u32, false, true); } static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h, @@ -498,7 +498,7 @@ static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h, cls_u32.hnode.handle = h->handle; cls_u32.hnode.prio = h->prio; - err = tc_setup_cb_call(block, TC_SETUP_CLSU32, &cls_u32, skip_sw); + err = tc_setup_cb_call(block, TC_SETUP_CLSU32, &cls_u32, skip_sw, true); if (err < 0) { u32_clear_hw_hnode(tp, h, NULL); return err; @@ -522,8 +522,8 @@ static void u32_remove_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n, cls_u32.command = TC_CLSU32_DELETE_KNODE; cls_u32.knode.handle = n->handle; - tc_setup_cb_call(block, TC_SETUP_CLSU32, &cls_u32, false); - tcf_block_offload_dec(block, &n->flags); + tc_setup_cb_destroy(block, tp, TC_SETUP_CLSU32, &cls_u32, false, + &n->flags, &n->in_hw_count, true); } static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n, @@ -552,13 +552,11 @@ static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n, if (n->ht_down) cls_u32.knode.link_handle = ht->handle; - err = tc_setup_cb_call(block, TC_SETUP_CLSU32, &cls_u32, skip_sw); - if (err < 0) { + err = tc_setup_cb_add(block, tp, TC_SETUP_CLSU32, &cls_u32, skip_sw, + &n->flags, &n->in_hw_count, true); + if (err) { u32_remove_hw_knode(tp, n, NULL); return err; - } else if (err > 0) { - n->in_hw_count = err; - tcf_block_offload_inc(block, &n->flags); } if (skip_sw && !(n->flags & TCA_CLS_FLAGS_IN_HW)) @@ -1201,14 +1199,11 @@ static int u32_reoffload_knode(struct tcf_proto *tp, struct tc_u_knode *n, cls_u32.knode.link_handle = ht->handle; } - err = cb(TC_SETUP_CLSU32, &cls_u32, cb_priv); - if (err) { - if (add && tc_skip_sw(n->flags)) - return err; - return 0; - } - - tc_cls_offload_cnt_update(block, &n->in_hw_count, &n->flags, add); + err = tc_setup_cb_reoffload(block, tp, add, cb, TC_SETUP_CLSU32, + &cls_u32, cb_priv, &n->flags, + &n->in_hw_count); + if (err) + return err; return 0; } diff --git a/net/sched/sch_cbs.c b/net/sched/sch_cbs.c index 810645b5c086..93b58fde99b7 100644 --- a/net/sched/sch_cbs.c +++ b/net/sched/sch_cbs.c @@ -299,7 +299,7 @@ static void cbs_set_port_rate(struct net_device *dev, struct cbs_sched_data *q) { struct ethtool_link_ksettings ecmd; int speed = SPEED_10; - int port_rate = -1; + int port_rate; int err; err = __ethtool_get_link_ksettings(dev, &ecmd); diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index d59fbcc745d1..c261c0a18868 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c @@ -45,7 +45,6 @@ struct fq_codel_flow { struct sk_buff *tail; struct list_head flowchain; int deficit; - u32 dropped; /* number of drops (or ECN marks) on this flow */ struct codel_vars cvars; }; /* please try to keep this structure <= 64 bytes */ @@ -173,7 +172,8 @@ static unsigned int fq_codel_drop(struct Qdisc *sch, unsigned int max_packets, __qdisc_drop(skb, to_free); } while (++i < max_packets && len < threshold); - flow->dropped += i; + /* Tell codel to increase its signal strength also */ + flow->cvars.count += i; q->backlogs[idx] -= len; q->memory_usage -= mem; sch->qstats.drops += i; @@ -211,7 +211,6 @@ static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch, list_add_tail(&flow->flowchain, &q->new_flows); q->new_flow_count++; flow->deficit = q->quantum; - flow->dropped = 0; } get_codel_cb(skb)->mem_usage = skb->truesize; q->memory_usage += get_codel_cb(skb)->mem_usage; @@ -286,7 +285,6 @@ static struct sk_buff *fq_codel_dequeue(struct Qdisc *sch) struct sk_buff *skb; struct fq_codel_flow *flow; struct list_head *head; - u32 prev_drop_count, prev_ecn_mark; begin: head = &q->new_flows; @@ -303,16 +301,10 @@ begin: goto begin; } - prev_drop_count = q->cstats.drop_count; - prev_ecn_mark = q->cstats.ecn_mark; - skb = codel_dequeue(sch, &sch->qstats.backlog, &q->cparams, &flow->cvars, &q->cstats, qdisc_pkt_len, codel_get_enqueue_time, drop_func, dequeue_func); - flow->dropped += q->cstats.drop_count - prev_drop_count; - flow->dropped += q->cstats.ecn_mark - prev_ecn_mark; - if (!skb) { /* force a pass through old_flows to prevent starvation */ if ((head == &q->new_flows) && !list_empty(&q->old_flows)) @@ -658,7 +650,7 @@ static int fq_codel_dump_class_stats(struct Qdisc *sch, unsigned long cl, sch_tree_unlock(sch); } qs.backlog = q->backlogs[idx]; - qs.drops = flow->dropped; + qs.drops = 0; } if (gnet_stats_copy_queue(d, NULL, &qs, qs.qlen) < 0) return -1; diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index ac28f6a5d70e..17bd8f539bc7 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -985,6 +985,9 @@ static void qdisc_destroy(struct Qdisc *qdisc) void qdisc_put(struct Qdisc *qdisc) { + if (!qdisc) + return; + if (qdisc->flags & TCQ_F_BUILTIN || !refcount_dec_and_test(&qdisc->refcnt)) return; diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index 8d8bc2ec5cd6..2f7b34205c82 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -29,8 +29,8 @@ static DEFINE_SPINLOCK(taprio_list_lock); #define TAPRIO_ALL_GATES_OPEN -1 -#define FLAGS_VALID(flags) (!((flags) & ~TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST)) #define TXTIME_ASSIST_IS_ENABLED(flags) ((flags) & TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST) +#define FULL_OFFLOAD_IS_ENABLED(flags) ((flags) & TCA_TAPRIO_ATTR_FLAG_FULL_OFFLOAD) struct sched_entry { struct list_head list; @@ -75,9 +75,16 @@ struct taprio_sched { struct sched_gate_list __rcu *admin_sched; struct hrtimer advance_timer; struct list_head taprio_list; + struct sk_buff *(*dequeue)(struct Qdisc *sch); + struct sk_buff *(*peek)(struct Qdisc *sch); u32 txtime_delay; }; +struct __tc_taprio_qopt_offload { + refcount_t users; + struct tc_taprio_qopt_offload offload; +}; + static ktime_t sched_base_time(const struct sched_gate_list *sched) { if (!sched) @@ -268,6 +275,19 @@ static bool is_valid_interval(struct sk_buff *skb, struct Qdisc *sch) return entry; } +static bool taprio_flags_valid(u32 flags) +{ + /* Make sure no other flag bits are set. */ + if (flags & ~(TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST | + TCA_TAPRIO_ATTR_FLAG_FULL_OFFLOAD)) + return false; + /* txtime-assist and full offload are mutually exclusive */ + if ((flags & TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST) && + (flags & TCA_TAPRIO_ATTR_FLAG_FULL_OFFLOAD)) + return false; + return true; +} + /* This returns the tstamp value set by TCP in terms of the set clock. */ static ktime_t get_tcp_tstamp(struct taprio_sched *q, struct sk_buff *skb) { @@ -417,7 +437,7 @@ static int taprio_enqueue(struct sk_buff *skb, struct Qdisc *sch, return qdisc_enqueue(skb, child, to_free); } -static struct sk_buff *taprio_peek(struct Qdisc *sch) +static struct sk_buff *taprio_peek_soft(struct Qdisc *sch) { struct taprio_sched *q = qdisc_priv(sch); struct net_device *dev = qdisc_dev(sch); @@ -461,6 +481,36 @@ static struct sk_buff *taprio_peek(struct Qdisc *sch) return NULL; } +static struct sk_buff *taprio_peek_offload(struct Qdisc *sch) +{ + struct taprio_sched *q = qdisc_priv(sch); + struct net_device *dev = qdisc_dev(sch); + struct sk_buff *skb; + int i; + + for (i = 0; i < dev->num_tx_queues; i++) { + struct Qdisc *child = q->qdiscs[i]; + + if (unlikely(!child)) + continue; + + skb = child->ops->peek(child); + if (!skb) + continue; + + return skb; + } + + return NULL; +} + +static struct sk_buff *taprio_peek(struct Qdisc *sch) +{ + struct taprio_sched *q = qdisc_priv(sch); + + return q->peek(sch); +} + static void taprio_set_budget(struct taprio_sched *q, struct sched_entry *entry) { atomic_set(&entry->budget, @@ -468,7 +518,7 @@ static void taprio_set_budget(struct taprio_sched *q, struct sched_entry *entry) atomic64_read(&q->picos_per_byte))); } -static struct sk_buff *taprio_dequeue(struct Qdisc *sch) +static struct sk_buff *taprio_dequeue_soft(struct Qdisc *sch) { struct taprio_sched *q = qdisc_priv(sch); struct net_device *dev = qdisc_dev(sch); @@ -550,6 +600,40 @@ done: return skb; } +static struct sk_buff *taprio_dequeue_offload(struct Qdisc *sch) +{ + struct taprio_sched *q = qdisc_priv(sch); + struct net_device *dev = qdisc_dev(sch); + struct sk_buff *skb; + int i; + + for (i = 0; i < dev->num_tx_queues; i++) { + struct Qdisc *child = q->qdiscs[i]; + + if (unlikely(!child)) + continue; + + skb = child->ops->dequeue(child); + if (unlikely(!skb)) + continue; + + qdisc_bstats_update(sch, skb); + qdisc_qstats_backlog_dec(sch, skb); + sch->q.qlen--; + + return skb; + } + + return NULL; +} + +static struct sk_buff *taprio_dequeue(struct Qdisc *sch) +{ + struct taprio_sched *q = qdisc_priv(sch); + + return q->dequeue(sch); +} + static bool should_restart_cycle(const struct sched_gate_list *oper, const struct sched_entry *entry) { @@ -672,10 +756,6 @@ static const struct nla_policy entry_policy[TCA_TAPRIO_SCHED_ENTRY_MAX + 1] = { [TCA_TAPRIO_SCHED_ENTRY_INTERVAL] = { .type = NLA_U32 }, }; -static const struct nla_policy entry_list_policy[TCA_TAPRIO_SCHED_MAX + 1] = { - [TCA_TAPRIO_SCHED_ENTRY] = { .type = NLA_NESTED }, -}; - static const struct nla_policy taprio_policy[TCA_TAPRIO_ATTR_MAX + 1] = { [TCA_TAPRIO_ATTR_PRIOMAP] = { .len = sizeof(struct tc_mqprio_qopt) @@ -936,6 +1016,9 @@ static void taprio_start_sched(struct Qdisc *sch, struct taprio_sched *q = qdisc_priv(sch); ktime_t expires; + if (FULL_OFFLOAD_IS_ENABLED(q->flags)) + return; + expires = hrtimer_get_expires(&q->advance_timer); if (expires == 0) expires = KTIME_MAX; @@ -1015,6 +1098,254 @@ static void setup_txtime(struct taprio_sched *q, } } +static struct tc_taprio_qopt_offload *taprio_offload_alloc(int num_entries) +{ + size_t size = sizeof(struct tc_taprio_sched_entry) * num_entries + + sizeof(struct __tc_taprio_qopt_offload); + struct __tc_taprio_qopt_offload *__offload; + + __offload = kzalloc(size, GFP_KERNEL); + if (!__offload) + return NULL; + + refcount_set(&__offload->users, 1); + + return &__offload->offload; +} + +struct tc_taprio_qopt_offload *taprio_offload_get(struct tc_taprio_qopt_offload + *offload) +{ + struct __tc_taprio_qopt_offload *__offload; + + __offload = container_of(offload, struct __tc_taprio_qopt_offload, + offload); + + refcount_inc(&__offload->users); + + return offload; +} +EXPORT_SYMBOL_GPL(taprio_offload_get); + +void taprio_offload_free(struct tc_taprio_qopt_offload *offload) +{ + struct __tc_taprio_qopt_offload *__offload; + + __offload = container_of(offload, struct __tc_taprio_qopt_offload, + offload); + + if (!refcount_dec_and_test(&__offload->users)) + return; + + kfree(__offload); +} +EXPORT_SYMBOL_GPL(taprio_offload_free); + +/* The function will only serve to keep the pointers to the "oper" and "admin" + * schedules valid in relation to their base times, so when calling dump() the + * users looks at the right schedules. + * When using full offload, the admin configuration is promoted to oper at the + * base_time in the PHC time domain. But because the system time is not + * necessarily in sync with that, we can't just trigger a hrtimer to call + * switch_schedules at the right hardware time. + * At the moment we call this by hand right away from taprio, but in the future + * it will be useful to create a mechanism for drivers to notify taprio of the + * offload state (PENDING, ACTIVE, INACTIVE) so it can be visible in dump(). + * This is left as TODO. + */ +void taprio_offload_config_changed(struct taprio_sched *q) +{ + struct sched_gate_list *oper, *admin; + + spin_lock(&q->current_entry_lock); + + oper = rcu_dereference_protected(q->oper_sched, + lockdep_is_held(&q->current_entry_lock)); + admin = rcu_dereference_protected(q->admin_sched, + lockdep_is_held(&q->current_entry_lock)); + + switch_schedules(q, &admin, &oper); + + spin_unlock(&q->current_entry_lock); +} + +static void taprio_sched_to_offload(struct taprio_sched *q, + struct sched_gate_list *sched, + const struct tc_mqprio_qopt *mqprio, + struct tc_taprio_qopt_offload *offload) +{ + struct sched_entry *entry; + int i = 0; + + offload->base_time = sched->base_time; + offload->cycle_time = sched->cycle_time; + offload->cycle_time_extension = sched->cycle_time_extension; + + list_for_each_entry(entry, &sched->entries, list) { + struct tc_taprio_sched_entry *e = &offload->entries[i]; + + e->command = entry->command; + e->interval = entry->interval; + e->gate_mask = entry->gate_mask; + i++; + } + + offload->num_entries = i; +} + +static int taprio_enable_offload(struct net_device *dev, + struct tc_mqprio_qopt *mqprio, + struct taprio_sched *q, + struct sched_gate_list *sched, + struct netlink_ext_ack *extack) +{ + const struct net_device_ops *ops = dev->netdev_ops; + struct tc_taprio_qopt_offload *offload; + int err = 0; + + if (!ops->ndo_setup_tc) { + NL_SET_ERR_MSG(extack, + "Device does not support taprio offload"); + return -EOPNOTSUPP; + } + + offload = taprio_offload_alloc(sched->num_entries); + if (!offload) { + NL_SET_ERR_MSG(extack, + "Not enough memory for enabling offload mode"); + return -ENOMEM; + } + offload->enable = 1; + taprio_sched_to_offload(q, sched, mqprio, offload); + + err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_TAPRIO, offload); + if (err < 0) { + NL_SET_ERR_MSG(extack, + "Device failed to setup taprio offload"); + goto done; + } + + taprio_offload_config_changed(q); + +done: + taprio_offload_free(offload); + + return err; +} + +static int taprio_disable_offload(struct net_device *dev, + struct taprio_sched *q, + struct netlink_ext_ack *extack) +{ + const struct net_device_ops *ops = dev->netdev_ops; + struct tc_taprio_qopt_offload *offload; + int err; + + if (!FULL_OFFLOAD_IS_ENABLED(q->flags)) + return 0; + + if (!ops->ndo_setup_tc) + return -EOPNOTSUPP; + + offload = taprio_offload_alloc(0); + if (!offload) { + NL_SET_ERR_MSG(extack, + "Not enough memory to disable offload mode"); + return -ENOMEM; + } + offload->enable = 0; + + err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_TAPRIO, offload); + if (err < 0) { + NL_SET_ERR_MSG(extack, + "Device failed to disable offload"); + goto out; + } + +out: + taprio_offload_free(offload); + + return err; +} + +/* If full offload is enabled, the only possible clockid is the net device's + * PHC. For that reason, specifying a clockid through netlink is incorrect. + * For txtime-assist, it is implicitly assumed that the device's PHC is kept + * in sync with the specified clockid via a user space daemon such as phc2sys. + * For both software taprio and txtime-assist, the clockid is used for the + * hrtimer that advances the schedule and hence mandatory. + */ +static int taprio_parse_clockid(struct Qdisc *sch, struct nlattr **tb, + struct netlink_ext_ack *extack) +{ + struct taprio_sched *q = qdisc_priv(sch); + struct net_device *dev = qdisc_dev(sch); + int err = -EINVAL; + + if (FULL_OFFLOAD_IS_ENABLED(q->flags)) { + const struct ethtool_ops *ops = dev->ethtool_ops; + struct ethtool_ts_info info = { + .cmd = ETHTOOL_GET_TS_INFO, + .phc_index = -1, + }; + + if (tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]) { + NL_SET_ERR_MSG(extack, + "The 'clockid' cannot be specified for full offload"); + goto out; + } + + if (ops && ops->get_ts_info) + err = ops->get_ts_info(dev, &info); + + if (err || info.phc_index < 0) { + NL_SET_ERR_MSG(extack, + "Device does not have a PTP clock"); + err = -ENOTSUPP; + goto out; + } + } else if (tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]) { + int clockid = nla_get_s32(tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]); + + /* We only support static clockids and we don't allow + * for it to be modified after the first init. + */ + if (clockid < 0 || + (q->clockid != -1 && q->clockid != clockid)) { + NL_SET_ERR_MSG(extack, + "Changing the 'clockid' of a running schedule is not supported"); + err = -ENOTSUPP; + goto out; + } + + switch (clockid) { + case CLOCK_REALTIME: + q->tk_offset = TK_OFFS_REAL; + break; + case CLOCK_MONOTONIC: + q->tk_offset = TK_OFFS_MAX; + break; + case CLOCK_BOOTTIME: + q->tk_offset = TK_OFFS_BOOT; + break; + case CLOCK_TAI: + q->tk_offset = TK_OFFS_TAI; + break; + default: + NL_SET_ERR_MSG(extack, "Invalid 'clockid'"); + err = -EINVAL; + goto out; + } + + q->clockid = clockid; + } else { + NL_SET_ERR_MSG(extack, "Specifying a 'clockid' is mandatory"); + goto out; + } +out: + return err; +} + static int taprio_change(struct Qdisc *sch, struct nlattr *opt, struct netlink_ext_ack *extack) { @@ -1024,9 +1355,9 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt, struct net_device *dev = qdisc_dev(sch); struct tc_mqprio_qopt *mqprio = NULL; u32 taprio_flags = 0; - int i, err, clockid; unsigned long flags; ktime_t start; + int i, err; err = nla_parse_nested_deprecated(tb, TCA_TAPRIO_ATTR_MAX, opt, taprio_policy, extack); @@ -1042,7 +1373,7 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt, if (q->flags != 0 && q->flags != taprio_flags) { NL_SET_ERR_MSG_MOD(extack, "Changing 'flags' of a running schedule is not supported"); return -EOPNOTSUPP; - } else if (!FLAGS_VALID(taprio_flags)) { + } else if (!taprio_flags_valid(taprio_flags)) { NL_SET_ERR_MSG_MOD(extack, "Specified 'flags' are not valid"); return -EINVAL; } @@ -1082,30 +1413,19 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt, goto free_sched; } - if (tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]) { - clockid = nla_get_s32(tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]); - - /* We only support static clockids and we don't allow - * for it to be modified after the first init. - */ - if (clockid < 0 || - (q->clockid != -1 && q->clockid != clockid)) { - NL_SET_ERR_MSG(extack, "Changing the 'clockid' of a running schedule is not supported"); - err = -ENOTSUPP; - goto free_sched; - } - - q->clockid = clockid; - } - - if (q->clockid == -1 && !tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]) { - NL_SET_ERR_MSG(extack, "Specifying a 'clockid' is mandatory"); - err = -EINVAL; + err = taprio_parse_clockid(sch, tb, extack); + if (err < 0) goto free_sched; - } taprio_set_picos_per_byte(dev, q); + if (FULL_OFFLOAD_IS_ENABLED(taprio_flags)) + err = taprio_enable_offload(dev, mqprio, q, new_admin, extack); + else + err = taprio_disable_offload(dev, q, extack); + if (err) + goto free_sched; + /* Protects against enqueue()/dequeue() */ spin_lock_bh(qdisc_lock(sch)); @@ -1120,6 +1440,7 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt, } if (!TXTIME_ASSIST_IS_ENABLED(taprio_flags) && + !FULL_OFFLOAD_IS_ENABLED(taprio_flags) && !hrtimer_active(&q->advance_timer)) { hrtimer_init(&q->advance_timer, q->clockid, HRTIMER_MODE_ABS); q->advance_timer.function = advance_sched; @@ -1138,23 +1459,15 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt, mqprio->prio_tc_map[i]); } - switch (q->clockid) { - case CLOCK_REALTIME: - q->tk_offset = TK_OFFS_REAL; - break; - case CLOCK_MONOTONIC: - q->tk_offset = TK_OFFS_MAX; - break; - case CLOCK_BOOTTIME: - q->tk_offset = TK_OFFS_BOOT; - break; - case CLOCK_TAI: - q->tk_offset = TK_OFFS_TAI; - break; - default: - NL_SET_ERR_MSG(extack, "Invalid 'clockid'"); - err = -EINVAL; - goto unlock; + if (FULL_OFFLOAD_IS_ENABLED(taprio_flags)) { + q->dequeue = taprio_dequeue_offload; + q->peek = taprio_peek_offload; + } else { + /* Be sure to always keep the function pointers + * in a consistent state. + */ + q->dequeue = taprio_dequeue_soft; + q->peek = taprio_peek_soft; } err = taprio_get_start_time(sch, new_admin, &start); @@ -1216,6 +1529,8 @@ static void taprio_destroy(struct Qdisc *sch) hrtimer_cancel(&q->advance_timer); + taprio_disable_offload(dev, q, NULL); + if (q->qdiscs) { for (i = 0; i < dev->num_tx_queues && q->qdiscs[i]; i++) qdisc_put(q->qdiscs[i]); @@ -1245,6 +1560,9 @@ static int taprio_init(struct Qdisc *sch, struct nlattr *opt, hrtimer_init(&q->advance_timer, CLOCK_TAI, HRTIMER_MODE_ABS); q->advance_timer.function = advance_sched; + q->dequeue = taprio_dequeue_soft; + q->peek = taprio_peek_soft; + q->root = sch; /* We only support static clockids. Use an invalid value as default @@ -1427,7 +1745,8 @@ static int taprio_dump(struct Qdisc *sch, struct sk_buff *skb) if (nla_put(skb, TCA_TAPRIO_ATTR_PRIOMAP, sizeof(opt), &opt)) goto options_error; - if (nla_put_s32(skb, TCA_TAPRIO_ATTR_SCHED_CLOCKID, q->clockid)) + if (!FULL_OFFLOAD_IS_ENABLED(q->flags) && + nla_put_s32(skb, TCA_TAPRIO_ATTR_SCHED_CLOCKID, q->clockid)) goto options_error; if (q->flags && nla_put_u32(skb, TCA_TAPRIO_ATTR_FLAGS, q->flags)) |