summaryrefslogtreecommitdiff
path: root/net/sched/sch_taprio.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-10-24 08:47:44 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2018-10-24 08:47:44 +0300
commit50b825d7e87f4cff7070df6eb26390152bb29537 (patch)
treeec82aba49ab0c4743266ff37e18c8304a0367d06 /net/sched/sch_taprio.c
parenta97a2d4d56ea596871b739d63d41b084733bd9fb (diff)
parent3f80e08f40cdb308589a49077c87632fa4508b21 (diff)
downloadlinux-50b825d7e87f4cff7070df6eb26390152bb29537.tar.xz
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller: 1) Add VF IPSEC offload support in ixgbe, from Shannon Nelson. 2) Add zero-copy AF_XDP support to i40e, from Björn Töpel. 3) All in-tree drivers are converted to {g,s}et_link_ksettings() so we can get rid of the {g,s}et_settings ethtool callbacks, from Michal Kubecek. 4) Add software timestamping to veth driver, from Michael Walle. 5) More work to make packet classifiers and actions lockless, from Vlad Buslov. 6) Support sticky FDB entries in bridge, from Nikolay Aleksandrov. 7) Add ipv6 version of IP_MULTICAST_ALL sockopt, from Andre Naujoks. 8) Support batching of XDP buffers in vhost_net, from Jason Wang. 9) Add flow dissector BPF hook, from Petar Penkov. 10) i40e vf --> generic iavf conversion, from Jesse Brandeburg. 11) Add NLA_REJECT netlink attribute policy type, to signal when users provide attributes in situations which don't make sense. From Johannes Berg. 12) Switch TCP and fair-queue scheduler over to earliest departure time model. From Eric Dumazet. 13) Improve guest receive performance by doing rx busy polling in tx path of vhost networking driver, from Tonghao Zhang. 14) Add per-cgroup local storage to bpf 15) Add reference tracking to BPF, from Joe Stringer. The verifier can now make sure that references taken to objects are properly released by the program. 16) Support in-place encryption in TLS, from Vakul Garg. 17) Add new taprio packet scheduler, from Vinicius Costa Gomes. 18) Lots of selftests additions, too numerous to mention one by one here but all of which are very much appreciated. 19) Support offloading of eBPF programs containing BPF to BPF calls in nfp driver, frm Quentin Monnet. 20) Move dpaa2_ptp driver out of staging, from Yangbo Lu. 21) Lots of u32 classifier cleanups and simplifications, from Al Viro. 22) Add new strict versions of netlink message parsers, and enable them for some situations. From David Ahern. 23) Evict neighbour entries on carrier down, also from David Ahern. 24) Support BPF sk_msg verdict programs with kTLS, from Daniel Borkmann and John Fastabend. 25) Add support for filtering route dumps, from David Ahern. 26) New igc Intel driver for 2.5G parts, from Sasha Neftin et al. 27) Allow vxlan enslavement to bridges in mlxsw driver, from Ido Schimmel. 28) Add queue and stack map types to eBPF, from Mauricio Vasquez B. 29) Add back byte-queue-limit support to r8169, with all the bug fixes in other areas of the driver it works now! From Florian Westphal and Heiner Kallweit. * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (2147 commits) tcp: add tcp_reset_xmit_timer() helper qed: Fix static checker warning Revert "be2net: remove desc field from be_eq_obj" Revert "net: simplify sock_poll_wait" net: socionext: Reset tx queue in ndo_stop net: socionext: Add dummy PHY register read in phy_write() net: socionext: Stop PHY before resetting netsec net: stmmac: Set OWN bit for jumbo frames arm64: dts: stratix10: Support Ethernet Jumbo frame tls: Add maintainers net: ethernet: ti: cpsw: unsync mcast entries while switch promisc mode octeontx2-af: Support for NIXLF's UCAST/PROMISC/ALLMULTI modes octeontx2-af: Support for setting MAC address octeontx2-af: Support for changing RSS algorithm octeontx2-af: NIX Rx flowkey configuration for RSS octeontx2-af: Install ucast and bcast pkt forwarding rules octeontx2-af: Add LMAC channel info to NIXLF_ALLOC response octeontx2-af: NPC MCAM and LDATA extract minimal configuration octeontx2-af: Enable packet length and csum validation octeontx2-af: Support for VTAG strip and capture ...
Diffstat (limited to 'net/sched/sch_taprio.c')
-rw-r--r--net/sched/sch_taprio.c962
1 files changed, 962 insertions, 0 deletions
diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
new file mode 100644
index 000000000000..206e4dbed12f
--- /dev/null
+++ b/net/sched/sch_taprio.c
@@ -0,0 +1,962 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/* net/sched/sch_taprio.c Time Aware Priority Scheduler
+ *
+ * Authors: Vinicius Costa Gomes <vinicius.gomes@intel.com>
+ *
+ */
+
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/list.h>
+#include <linux/errno.h>
+#include <linux/skbuff.h>
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <net/netlink.h>
+#include <net/pkt_sched.h>
+#include <net/pkt_cls.h>
+#include <net/sch_generic.h>
+
+#define TAPRIO_ALL_GATES_OPEN -1
+
+struct sched_entry {
+ struct list_head list;
+
+ /* The instant that this entry "closes" and the next one
+ * should open, the qdisc will make some effort so that no
+ * packet leaves after this time.
+ */
+ ktime_t close_time;
+ atomic_t budget;
+ int index;
+ u32 gate_mask;
+ u32 interval;
+ u8 command;
+};
+
+struct taprio_sched {
+ struct Qdisc **qdiscs;
+ struct Qdisc *root;
+ s64 base_time;
+ int clockid;
+ int picos_per_byte; /* Using picoseconds because for 10Gbps+
+ * speeds it's sub-nanoseconds per byte
+ */
+ size_t num_entries;
+
+ /* Protects the update side of the RCU protected current_entry */
+ spinlock_t current_entry_lock;
+ struct sched_entry __rcu *current_entry;
+ struct list_head entries;
+ ktime_t (*get_time)(void);
+ struct hrtimer advance_timer;
+};
+
+static int taprio_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+ struct sk_buff **to_free)
+{
+ struct taprio_sched *q = qdisc_priv(sch);
+ struct Qdisc *child;
+ int queue;
+
+ queue = skb_get_queue_mapping(skb);
+
+ child = q->qdiscs[queue];
+ if (unlikely(!child))
+ return qdisc_drop(skb, sch, to_free);
+
+ qdisc_qstats_backlog_inc(sch, skb);
+ sch->q.qlen++;
+
+ return qdisc_enqueue(skb, child, to_free);
+}
+
+static struct sk_buff *taprio_peek(struct Qdisc *sch)
+{
+ struct taprio_sched *q = qdisc_priv(sch);
+ struct net_device *dev = qdisc_dev(sch);
+ struct sched_entry *entry;
+ struct sk_buff *skb;
+ u32 gate_mask;
+ int i;
+
+ rcu_read_lock();
+ entry = rcu_dereference(q->current_entry);
+ gate_mask = entry ? entry->gate_mask : -1;
+ rcu_read_unlock();
+
+ if (!gate_mask)
+ return NULL;
+
+ for (i = 0; i < dev->num_tx_queues; i++) {
+ struct Qdisc *child = q->qdiscs[i];
+ int prio;
+ u8 tc;
+
+ if (unlikely(!child))
+ continue;
+
+ skb = child->ops->peek(child);
+ if (!skb)
+ continue;
+
+ prio = skb->priority;
+ tc = netdev_get_prio_tc_map(dev, prio);
+
+ if (!(gate_mask & BIT(tc)))
+ return NULL;
+
+ return skb;
+ }
+
+ return NULL;
+}
+
+static inline int length_to_duration(struct taprio_sched *q, int len)
+{
+ return (len * q->picos_per_byte) / 1000;
+}
+
+static struct sk_buff *taprio_dequeue(struct Qdisc *sch)
+{
+ struct taprio_sched *q = qdisc_priv(sch);
+ struct net_device *dev = qdisc_dev(sch);
+ struct sched_entry *entry;
+ struct sk_buff *skb;
+ u32 gate_mask;
+ int i;
+
+ rcu_read_lock();
+ entry = rcu_dereference(q->current_entry);
+ /* if there's no entry, it means that the schedule didn't
+ * start yet, so force all gates to be open, this is in
+ * accordance to IEEE 802.1Qbv-2015 Section 8.6.9.4.5
+ * "AdminGateSates"
+ */
+ gate_mask = entry ? entry->gate_mask : TAPRIO_ALL_GATES_OPEN;
+ rcu_read_unlock();
+
+ if (!gate_mask)
+ return NULL;
+
+ for (i = 0; i < dev->num_tx_queues; i++) {
+ struct Qdisc *child = q->qdiscs[i];
+ ktime_t guard;
+ int prio;
+ int len;
+ u8 tc;
+
+ if (unlikely(!child))
+ continue;
+
+ skb = child->ops->peek(child);
+ if (!skb)
+ continue;
+
+ prio = skb->priority;
+ tc = netdev_get_prio_tc_map(dev, prio);
+
+ if (!(gate_mask & BIT(tc)))
+ continue;
+
+ len = qdisc_pkt_len(skb);
+ guard = ktime_add_ns(q->get_time(),
+ length_to_duration(q, len));
+
+ /* In the case that there's no gate entry, there's no
+ * guard band ...
+ */
+ if (gate_mask != TAPRIO_ALL_GATES_OPEN &&
+ ktime_after(guard, entry->close_time))
+ return NULL;
+
+ /* ... and no budget. */
+ if (gate_mask != TAPRIO_ALL_GATES_OPEN &&
+ atomic_sub_return(len, &entry->budget) < 0)
+ return NULL;
+
+ skb = child->ops->dequeue(child);
+ if (unlikely(!skb))
+ return NULL;
+
+ qdisc_bstats_update(sch, skb);
+ qdisc_qstats_backlog_dec(sch, skb);
+ sch->q.qlen--;
+
+ return skb;
+ }
+
+ return NULL;
+}
+
+static bool should_restart_cycle(const struct taprio_sched *q,
+ const struct sched_entry *entry)
+{
+ WARN_ON(!entry);
+
+ return list_is_last(&entry->list, &q->entries);
+}
+
+static enum hrtimer_restart advance_sched(struct hrtimer *timer)
+{
+ struct taprio_sched *q = container_of(timer, struct taprio_sched,
+ advance_timer);
+ struct sched_entry *entry, *next;
+ struct Qdisc *sch = q->root;
+ ktime_t close_time;
+
+ spin_lock(&q->current_entry_lock);
+ entry = rcu_dereference_protected(q->current_entry,
+ lockdep_is_held(&q->current_entry_lock));
+
+ /* This is the case that it's the first time that the schedule
+ * runs, so it only happens once per schedule. The first entry
+ * is pre-calculated during the schedule initialization.
+ */
+ if (unlikely(!entry)) {
+ next = list_first_entry(&q->entries, struct sched_entry,
+ list);
+ close_time = next->close_time;
+ goto first_run;
+ }
+
+ if (should_restart_cycle(q, entry))
+ next = list_first_entry(&q->entries, struct sched_entry,
+ list);
+ else
+ next = list_next_entry(entry, list);
+
+ close_time = ktime_add_ns(entry->close_time, next->interval);
+
+ next->close_time = close_time;
+ atomic_set(&next->budget,
+ (next->interval * 1000) / q->picos_per_byte);
+
+first_run:
+ rcu_assign_pointer(q->current_entry, next);
+ spin_unlock(&q->current_entry_lock);
+
+ hrtimer_set_expires(&q->advance_timer, close_time);
+
+ rcu_read_lock();
+ __netif_schedule(sch);
+ rcu_read_unlock();
+
+ return HRTIMER_RESTART;
+}
+
+static const struct nla_policy entry_policy[TCA_TAPRIO_SCHED_ENTRY_MAX + 1] = {
+ [TCA_TAPRIO_SCHED_ENTRY_INDEX] = { .type = NLA_U32 },
+ [TCA_TAPRIO_SCHED_ENTRY_CMD] = { .type = NLA_U8 },
+ [TCA_TAPRIO_SCHED_ENTRY_GATE_MASK] = { .type = NLA_U32 },
+ [TCA_TAPRIO_SCHED_ENTRY_INTERVAL] = { .type = NLA_U32 },
+};
+
+static const struct nla_policy entry_list_policy[TCA_TAPRIO_SCHED_MAX + 1] = {
+ [TCA_TAPRIO_SCHED_ENTRY] = { .type = NLA_NESTED },
+};
+
+static const struct nla_policy taprio_policy[TCA_TAPRIO_ATTR_MAX + 1] = {
+ [TCA_TAPRIO_ATTR_PRIOMAP] = {
+ .len = sizeof(struct tc_mqprio_qopt)
+ },
+ [TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST] = { .type = NLA_NESTED },
+ [TCA_TAPRIO_ATTR_SCHED_BASE_TIME] = { .type = NLA_S64 },
+ [TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY] = { .type = NLA_NESTED },
+ [TCA_TAPRIO_ATTR_SCHED_CLOCKID] = { .type = NLA_S32 },
+};
+
+static int fill_sched_entry(struct nlattr **tb, struct sched_entry *entry,
+ struct netlink_ext_ack *extack)
+{
+ u32 interval = 0;
+
+ if (tb[TCA_TAPRIO_SCHED_ENTRY_CMD])
+ entry->command = nla_get_u8(
+ tb[TCA_TAPRIO_SCHED_ENTRY_CMD]);
+
+ if (tb[TCA_TAPRIO_SCHED_ENTRY_GATE_MASK])
+ entry->gate_mask = nla_get_u32(
+ tb[TCA_TAPRIO_SCHED_ENTRY_GATE_MASK]);
+
+ if (tb[TCA_TAPRIO_SCHED_ENTRY_INTERVAL])
+ interval = nla_get_u32(
+ tb[TCA_TAPRIO_SCHED_ENTRY_INTERVAL]);
+
+ if (interval == 0) {
+ NL_SET_ERR_MSG(extack, "Invalid interval for schedule entry");
+ return -EINVAL;
+ }
+
+ entry->interval = interval;
+
+ return 0;
+}
+
+static int parse_sched_entry(struct nlattr *n, struct sched_entry *entry,
+ int index, struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[TCA_TAPRIO_SCHED_ENTRY_MAX + 1] = { };
+ int err;
+
+ err = nla_parse_nested(tb, TCA_TAPRIO_SCHED_ENTRY_MAX, n,
+ entry_policy, NULL);
+ if (err < 0) {
+ NL_SET_ERR_MSG(extack, "Could not parse nested entry");
+ return -EINVAL;
+ }
+
+ entry->index = index;
+
+ return fill_sched_entry(tb, entry, extack);
+}
+
+/* Returns the number of entries in case of success */
+static int parse_sched_single_entry(struct nlattr *n,
+ struct taprio_sched *q,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb_entry[TCA_TAPRIO_SCHED_ENTRY_MAX + 1] = { };
+ struct nlattr *tb_list[TCA_TAPRIO_SCHED_MAX + 1] = { };
+ struct sched_entry *entry;
+ bool found = false;
+ u32 index;
+ int err;
+
+ err = nla_parse_nested(tb_list, TCA_TAPRIO_SCHED_MAX,
+ n, entry_list_policy, NULL);
+ if (err < 0) {
+ NL_SET_ERR_MSG(extack, "Could not parse nested entry");
+ return -EINVAL;
+ }
+
+ if (!tb_list[TCA_TAPRIO_SCHED_ENTRY]) {
+ NL_SET_ERR_MSG(extack, "Single-entry must include an entry");
+ return -EINVAL;
+ }
+
+ err = nla_parse_nested(tb_entry, TCA_TAPRIO_SCHED_ENTRY_MAX,
+ tb_list[TCA_TAPRIO_SCHED_ENTRY],
+ entry_policy, NULL);
+ if (err < 0) {
+ NL_SET_ERR_MSG(extack, "Could not parse nested entry");
+ return -EINVAL;
+ }
+
+ if (!tb_entry[TCA_TAPRIO_SCHED_ENTRY_INDEX]) {
+ NL_SET_ERR_MSG(extack, "Entry must specify an index\n");
+ return -EINVAL;
+ }
+
+ index = nla_get_u32(tb_entry[TCA_TAPRIO_SCHED_ENTRY_INDEX]);
+ if (index >= q->num_entries) {
+ NL_SET_ERR_MSG(extack, "Index for single entry exceeds number of entries in schedule");
+ return -EINVAL;
+ }
+
+ list_for_each_entry(entry, &q->entries, list) {
+ if (entry->index == index) {
+ found = true;
+ break;
+ }
+ }
+
+ if (!found) {
+ NL_SET_ERR_MSG(extack, "Could not find entry");
+ return -ENOENT;
+ }
+
+ err = fill_sched_entry(tb_entry, entry, extack);
+ if (err < 0)
+ return err;
+
+ return q->num_entries;
+}
+
+static int parse_sched_list(struct nlattr *list,
+ struct taprio_sched *q,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *n;
+ int err, rem;
+ int i = 0;
+
+ if (!list)
+ return -EINVAL;
+
+ nla_for_each_nested(n, list, rem) {
+ struct sched_entry *entry;
+
+ if (nla_type(n) != TCA_TAPRIO_SCHED_ENTRY) {
+ NL_SET_ERR_MSG(extack, "Attribute is not of type 'entry'");
+ continue;
+ }
+
+ entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+ if (!entry) {
+ NL_SET_ERR_MSG(extack, "Not enough memory for entry");
+ return -ENOMEM;
+ }
+
+ err = parse_sched_entry(n, entry, i, extack);
+ if (err < 0) {
+ kfree(entry);
+ return err;
+ }
+
+ list_add_tail(&entry->list, &q->entries);
+ i++;
+ }
+
+ q->num_entries = i;
+
+ return i;
+}
+
+/* Returns the number of entries in case of success */
+static int parse_taprio_opt(struct nlattr **tb, struct taprio_sched *q,
+ struct netlink_ext_ack *extack)
+{
+ int err = 0;
+ int clockid;
+
+ if (tb[TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST] &&
+ tb[TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY])
+ return -EINVAL;
+
+ if (tb[TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY] && q->num_entries == 0)
+ return -EINVAL;
+
+ if (q->clockid == -1 && !tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID])
+ return -EINVAL;
+
+ if (tb[TCA_TAPRIO_ATTR_SCHED_BASE_TIME])
+ q->base_time = nla_get_s64(
+ tb[TCA_TAPRIO_ATTR_SCHED_BASE_TIME]);
+
+ if (tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]) {
+ clockid = nla_get_s32(tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]);
+
+ /* We only support static clockids and we don't allow
+ * for it to be modified after the first init.
+ */
+ if (clockid < 0 || (q->clockid != -1 && q->clockid != clockid))
+ return -EINVAL;
+
+ q->clockid = clockid;
+ }
+
+ if (tb[TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST])
+ err = parse_sched_list(
+ tb[TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST], q, extack);
+ else if (tb[TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY])
+ err = parse_sched_single_entry(
+ tb[TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY], q, extack);
+
+ /* parse_sched_* return the number of entries in the schedule,
+ * a schedule with zero entries is an error.
+ */
+ if (err == 0) {
+ NL_SET_ERR_MSG(extack, "The schedule should contain at least one entry");
+ return -EINVAL;
+ }
+
+ return err;
+}
+
+static int taprio_parse_mqprio_opt(struct net_device *dev,
+ struct tc_mqprio_qopt *qopt,
+ struct netlink_ext_ack *extack)
+{
+ int i, j;
+
+ if (!qopt) {
+ NL_SET_ERR_MSG(extack, "'mqprio' configuration is necessary");
+ return -EINVAL;
+ }
+
+ /* Verify num_tc is not out of max range */
+ if (qopt->num_tc > TC_MAX_QUEUE) {
+ NL_SET_ERR_MSG(extack, "Number of traffic classes is outside valid range");
+ return -EINVAL;
+ }
+
+ /* taprio imposes that traffic classes map 1:n to tx queues */
+ if (qopt->num_tc > dev->num_tx_queues) {
+ NL_SET_ERR_MSG(extack, "Number of traffic classes is greater than number of HW queues");
+ return -EINVAL;
+ }
+
+ /* Verify priority mapping uses valid tcs */
+ for (i = 0; i < TC_BITMASK + 1; i++) {
+ if (qopt->prio_tc_map[i] >= qopt->num_tc) {
+ NL_SET_ERR_MSG(extack, "Invalid traffic class in priority to traffic class mapping");
+ return -EINVAL;
+ }
+ }
+
+ for (i = 0; i < qopt->num_tc; i++) {
+ unsigned int last = qopt->offset[i] + qopt->count[i];
+
+ /* Verify the queue count is in tx range being equal to the
+ * real_num_tx_queues indicates the last queue is in use.
+ */
+ if (qopt->offset[i] >= dev->num_tx_queues ||
+ !qopt->count[i] ||
+ last > dev->real_num_tx_queues) {
+ NL_SET_ERR_MSG(extack, "Invalid queue in traffic class to queue mapping");
+ return -EINVAL;
+ }
+
+ /* Verify that the offset and counts do not overlap */
+ for (j = i + 1; j < qopt->num_tc; j++) {
+ if (last > qopt->offset[j]) {
+ NL_SET_ERR_MSG(extack, "Detected overlap in the traffic class to queue mapping");
+ return -EINVAL;
+ }
+ }
+ }
+
+ return 0;
+}
+
+static ktime_t taprio_get_start_time(struct Qdisc *sch)
+{
+ struct taprio_sched *q = qdisc_priv(sch);
+ struct sched_entry *entry;
+ ktime_t now, base, cycle;
+ s64 n;
+
+ base = ns_to_ktime(q->base_time);
+ cycle = 0;
+
+ /* Calculate the cycle_time, by summing all the intervals.
+ */
+ list_for_each_entry(entry, &q->entries, list)
+ cycle = ktime_add_ns(cycle, entry->interval);
+
+ if (!cycle)
+ return base;
+
+ now = q->get_time();
+
+ if (ktime_after(base, now))
+ return base;
+
+ /* Schedule the start time for the beginning of the next
+ * cycle.
+ */
+ n = div64_s64(ktime_sub_ns(now, base), cycle);
+
+ return ktime_add_ns(base, (n + 1) * cycle);
+}
+
+static void taprio_start_sched(struct Qdisc *sch, ktime_t start)
+{
+ struct taprio_sched *q = qdisc_priv(sch);
+ struct sched_entry *first;
+ unsigned long flags;
+
+ spin_lock_irqsave(&q->current_entry_lock, flags);
+
+ first = list_first_entry(&q->entries, struct sched_entry,
+ list);
+
+ first->close_time = ktime_add_ns(start, first->interval);
+ atomic_set(&first->budget,
+ (first->interval * 1000) / q->picos_per_byte);
+ rcu_assign_pointer(q->current_entry, NULL);
+
+ spin_unlock_irqrestore(&q->current_entry_lock, flags);
+
+ hrtimer_start(&q->advance_timer, start, HRTIMER_MODE_ABS);
+}
+
+static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[TCA_TAPRIO_ATTR_MAX + 1] = { };
+ struct taprio_sched *q = qdisc_priv(sch);
+ struct net_device *dev = qdisc_dev(sch);
+ struct tc_mqprio_qopt *mqprio = NULL;
+ struct ethtool_link_ksettings ecmd;
+ int i, err, size;
+ s64 link_speed;
+ ktime_t start;
+
+ err = nla_parse_nested(tb, TCA_TAPRIO_ATTR_MAX, opt,
+ taprio_policy, extack);
+ if (err < 0)
+ return err;
+
+ err = -EINVAL;
+ if (tb[TCA_TAPRIO_ATTR_PRIOMAP])
+ mqprio = nla_data(tb[TCA_TAPRIO_ATTR_PRIOMAP]);
+
+ err = taprio_parse_mqprio_opt(dev, mqprio, extack);
+ if (err < 0)
+ return err;
+
+ /* A schedule with less than one entry is an error */
+ size = parse_taprio_opt(tb, q, extack);
+ if (size < 0)
+ return size;
+
+ hrtimer_init(&q->advance_timer, q->clockid, HRTIMER_MODE_ABS);
+ q->advance_timer.function = advance_sched;
+
+ switch (q->clockid) {
+ case CLOCK_REALTIME:
+ q->get_time = ktime_get_real;
+ break;
+ case CLOCK_MONOTONIC:
+ q->get_time = ktime_get;
+ break;
+ case CLOCK_BOOTTIME:
+ q->get_time = ktime_get_boottime;
+ break;
+ case CLOCK_TAI:
+ q->get_time = ktime_get_clocktai;
+ break;
+ default:
+ return -ENOTSUPP;
+ }
+
+ for (i = 0; i < dev->num_tx_queues; i++) {
+ struct netdev_queue *dev_queue;
+ struct Qdisc *qdisc;
+
+ dev_queue = netdev_get_tx_queue(dev, i);
+ qdisc = qdisc_create_dflt(dev_queue,
+ &pfifo_qdisc_ops,
+ TC_H_MAKE(TC_H_MAJ(sch->handle),
+ TC_H_MIN(i + 1)),
+ extack);
+ if (!qdisc)
+ return -ENOMEM;
+
+ if (i < dev->real_num_tx_queues)
+ qdisc_hash_add(qdisc, false);
+
+ q->qdiscs[i] = qdisc;
+ }
+
+ if (mqprio) {
+ netdev_set_num_tc(dev, mqprio->num_tc);
+ for (i = 0; i < mqprio->num_tc; i++)
+ netdev_set_tc_queue(dev, i,
+ mqprio->count[i],
+ mqprio->offset[i]);
+
+ /* Always use supplied priority mappings */
+ for (i = 0; i < TC_BITMASK + 1; i++)
+ netdev_set_prio_tc_map(dev, i,
+ mqprio->prio_tc_map[i]);
+ }
+
+ if (!__ethtool_get_link_ksettings(dev, &ecmd))
+ link_speed = ecmd.base.speed;
+ else
+ link_speed = SPEED_1000;
+
+ q->picos_per_byte = div64_s64(NSEC_PER_SEC * 1000LL * 8,
+ link_speed * 1000 * 1000);
+
+ start = taprio_get_start_time(sch);
+ if (!start)
+ return 0;
+
+ taprio_start_sched(sch, start);
+
+ return 0;
+}
+
+static void taprio_destroy(struct Qdisc *sch)
+{
+ struct taprio_sched *q = qdisc_priv(sch);
+ struct net_device *dev = qdisc_dev(sch);
+ struct sched_entry *entry, *n;
+ unsigned int i;
+
+ hrtimer_cancel(&q->advance_timer);
+
+ if (q->qdiscs) {
+ for (i = 0; i < dev->num_tx_queues && q->qdiscs[i]; i++)
+ qdisc_put(q->qdiscs[i]);
+
+ kfree(q->qdiscs);
+ }
+ q->qdiscs = NULL;
+
+ netdev_set_num_tc(dev, 0);
+
+ list_for_each_entry_safe(entry, n, &q->entries, list) {
+ list_del(&entry->list);
+ kfree(entry);
+ }
+}
+
+static int taprio_init(struct Qdisc *sch, struct nlattr *opt,
+ struct netlink_ext_ack *extack)
+{
+ struct taprio_sched *q = qdisc_priv(sch);
+ struct net_device *dev = qdisc_dev(sch);
+
+ INIT_LIST_HEAD(&q->entries);
+ spin_lock_init(&q->current_entry_lock);
+
+ /* We may overwrite the configuration later */
+ hrtimer_init(&q->advance_timer, CLOCK_TAI, HRTIMER_MODE_ABS);
+
+ q->root = sch;
+
+ /* We only support static clockids. Use an invalid value as default
+ * and get the valid one on taprio_change().
+ */
+ q->clockid = -1;
+
+ if (sch->parent != TC_H_ROOT)
+ return -EOPNOTSUPP;
+
+ if (!netif_is_multiqueue(dev))
+ return -EOPNOTSUPP;
+
+ /* pre-allocate qdisc, attachment can't fail */
+ q->qdiscs = kcalloc(dev->num_tx_queues,
+ sizeof(q->qdiscs[0]),
+ GFP_KERNEL);
+
+ if (!q->qdiscs)
+ return -ENOMEM;
+
+ if (!opt)
+ return -EINVAL;
+
+ return taprio_change(sch, opt, extack);
+}
+
+static struct netdev_queue *taprio_queue_get(struct Qdisc *sch,
+ unsigned long cl)
+{
+ struct net_device *dev = qdisc_dev(sch);
+ unsigned long ntx = cl - 1;
+
+ if (ntx >= dev->num_tx_queues)
+ return NULL;
+
+ return netdev_get_tx_queue(dev, ntx);
+}
+
+static int taprio_graft(struct Qdisc *sch, unsigned long cl,
+ struct Qdisc *new, struct Qdisc **old,
+ struct netlink_ext_ack *extack)
+{
+ struct taprio_sched *q = qdisc_priv(sch);
+ struct net_device *dev = qdisc_dev(sch);
+ struct netdev_queue *dev_queue = taprio_queue_get(sch, cl);
+
+ if (!dev_queue)
+ return -EINVAL;
+
+ if (dev->flags & IFF_UP)
+ dev_deactivate(dev);
+
+ *old = q->qdiscs[cl - 1];
+ q->qdiscs[cl - 1] = new;
+
+ if (new)
+ new->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
+
+ if (dev->flags & IFF_UP)
+ dev_activate(dev);
+
+ return 0;
+}
+
+static int dump_entry(struct sk_buff *msg,
+ const struct sched_entry *entry)
+{
+ struct nlattr *item;
+
+ item = nla_nest_start(msg, TCA_TAPRIO_SCHED_ENTRY);
+ if (!item)
+ return -ENOSPC;
+
+ if (nla_put_u32(msg, TCA_TAPRIO_SCHED_ENTRY_INDEX, entry->index))
+ goto nla_put_failure;
+
+ if (nla_put_u8(msg, TCA_TAPRIO_SCHED_ENTRY_CMD, entry->command))
+ goto nla_put_failure;
+
+ if (nla_put_u32(msg, TCA_TAPRIO_SCHED_ENTRY_GATE_MASK,
+ entry->gate_mask))
+ goto nla_put_failure;
+
+ if (nla_put_u32(msg, TCA_TAPRIO_SCHED_ENTRY_INTERVAL,
+ entry->interval))
+ goto nla_put_failure;
+
+ return nla_nest_end(msg, item);
+
+nla_put_failure:
+ nla_nest_cancel(msg, item);
+ return -1;
+}
+
+static int taprio_dump(struct Qdisc *sch, struct sk_buff *skb)
+{
+ struct taprio_sched *q = qdisc_priv(sch);
+ struct net_device *dev = qdisc_dev(sch);
+ struct tc_mqprio_qopt opt = { 0 };
+ struct nlattr *nest, *entry_list;
+ struct sched_entry *entry;
+ unsigned int i;
+
+ opt.num_tc = netdev_get_num_tc(dev);
+ memcpy(opt.prio_tc_map, dev->prio_tc_map, sizeof(opt.prio_tc_map));
+
+ for (i = 0; i < netdev_get_num_tc(dev); i++) {
+ opt.count[i] = dev->tc_to_txq[i].count;
+ opt.offset[i] = dev->tc_to_txq[i].offset;
+ }
+
+ nest = nla_nest_start(skb, TCA_OPTIONS);
+ if (!nest)
+ return -ENOSPC;
+
+ if (nla_put(skb, TCA_TAPRIO_ATTR_PRIOMAP, sizeof(opt), &opt))
+ goto options_error;
+
+ if (nla_put_s64(skb, TCA_TAPRIO_ATTR_SCHED_BASE_TIME,
+ q->base_time, TCA_TAPRIO_PAD))
+ goto options_error;
+
+ if (nla_put_s32(skb, TCA_TAPRIO_ATTR_SCHED_CLOCKID, q->clockid))
+ goto options_error;
+
+ entry_list = nla_nest_start(skb, TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST);
+ if (!entry_list)
+ goto options_error;
+
+ list_for_each_entry(entry, &q->entries, list) {
+ if (dump_entry(skb, entry) < 0)
+ goto options_error;
+ }
+
+ nla_nest_end(skb, entry_list);
+
+ return nla_nest_end(skb, nest);
+
+options_error:
+ nla_nest_cancel(skb, nest);
+ return -1;
+}
+
+static struct Qdisc *taprio_leaf(struct Qdisc *sch, unsigned long cl)
+{
+ struct netdev_queue *dev_queue = taprio_queue_get(sch, cl);
+
+ if (!dev_queue)
+ return NULL;
+
+ return dev_queue->qdisc_sleeping;
+}
+
+static unsigned long taprio_find(struct Qdisc *sch, u32 classid)
+{
+ unsigned int ntx = TC_H_MIN(classid);
+
+ if (!taprio_queue_get(sch, ntx))
+ return 0;
+ return ntx;
+}
+
+static int taprio_dump_class(struct Qdisc *sch, unsigned long cl,
+ struct sk_buff *skb, struct tcmsg *tcm)
+{
+ struct netdev_queue *dev_queue = taprio_queue_get(sch, cl);
+
+ tcm->tcm_parent = TC_H_ROOT;
+ tcm->tcm_handle |= TC_H_MIN(cl);
+ tcm->tcm_info = dev_queue->qdisc_sleeping->handle;
+
+ return 0;
+}
+
+static int taprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
+ struct gnet_dump *d)
+ __releases(d->lock)
+ __acquires(d->lock)
+{
+ struct netdev_queue *dev_queue = taprio_queue_get(sch, cl);
+
+ sch = dev_queue->qdisc_sleeping;
+ if (gnet_stats_copy_basic(&sch->running, d, NULL, &sch->bstats) < 0 ||
+ gnet_stats_copy_queue(d, NULL, &sch->qstats, sch->q.qlen) < 0)
+ return -1;
+ return 0;
+}
+
+static void taprio_walk(struct Qdisc *sch, struct qdisc_walker *arg)
+{
+ struct net_device *dev = qdisc_dev(sch);
+ unsigned long ntx;
+
+ if (arg->stop)
+ return;
+
+ arg->count = arg->skip;
+ for (ntx = arg->skip; ntx < dev->num_tx_queues; ntx++) {
+ if (arg->fn(sch, ntx + 1, arg) < 0) {
+ arg->stop = 1;
+ break;
+ }
+ arg->count++;
+ }
+}
+
+static struct netdev_queue *taprio_select_queue(struct Qdisc *sch,
+ struct tcmsg *tcm)
+{
+ return taprio_queue_get(sch, TC_H_MIN(tcm->tcm_parent));
+}
+
+static const struct Qdisc_class_ops taprio_class_ops = {
+ .graft = taprio_graft,
+ .leaf = taprio_leaf,
+ .find = taprio_find,
+ .walk = taprio_walk,
+ .dump = taprio_dump_class,
+ .dump_stats = taprio_dump_class_stats,
+ .select_queue = taprio_select_queue,
+};
+
+static struct Qdisc_ops taprio_qdisc_ops __read_mostly = {
+ .cl_ops = &taprio_class_ops,
+ .id = "taprio",
+ .priv_size = sizeof(struct taprio_sched),
+ .init = taprio_init,
+ .destroy = taprio_destroy,
+ .peek = taprio_peek,
+ .dequeue = taprio_dequeue,
+ .enqueue = taprio_enqueue,
+ .dump = taprio_dump,
+ .owner = THIS_MODULE,
+};
+
+static int __init taprio_module_init(void)
+{
+ return register_qdisc(&taprio_qdisc_ops);
+}
+
+static void __exit taprio_module_exit(void)
+{
+ unregister_qdisc(&taprio_qdisc_ops);
+}
+
+module_init(taprio_module_init);
+module_exit(taprio_module_exit);
+MODULE_LICENSE("GPL");