diff options
author | David S. Miller <davem@davemloft.net> | 2018-07-12 09:03:32 +0300 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2018-07-12 09:03:32 +0300 |
commit | e32f55f373217001187ff171e75c5dfbb251f633 (patch) | |
tree | a3c0f9e288cc6b2f19594475de9f07b9e44fca9a /net/core | |
parent | 4929c9428a171145f82f81aae0c3c25ef7d82837 (diff) | |
parent | 8ec56fc3c5ee6f9700adac190e9ce5b8859a58b6 (diff) | |
download | linux-e32f55f373217001187ff171e75c5dfbb251f633.tar.xz |
Merge branch '10GbE' of git://git.kernel.org/pub/scm/linux/kernel/git/jkirsher/next-queue
Jeff Kirsher says:
====================
L2 Fwd Offload & 10GbE Intel Driver Updates 2018-07-09
This patch series is meant to allow support for the L2 forward offload, aka
MACVLAN offload without the need for using ndo_select_queue.
The existing solution currently requires that we use ndo_select_queue in
the transmit path if we want to associate specific Tx queues with a given
MACVLAN interface. In order to get away from this we need to repurpose the
tc_to_txq array and XPS pointer for the MACVLAN interface and use those as
a means of accessing the queues on the lower device. As a result we cannot
offload a device that is configured as multiqueue, however it doesn't
really make sense to configure a macvlan interfaced as being multiqueue
anyway since it doesn't really have a qdisc of its own in the first place.
The big changes in this set are:
Allow lower device to update tc_to_txq and XPS map of offloaded MACVLAN
Disable XPS for single queue devices
Replace accel_priv with sb_dev in ndo_select_queue
Add sb_dev parameter to fallback function for ndo_select_queue
Consolidated ndo_select_queue functions that appeared to be duplicates
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/core')
-rw-r--r-- | net/core/dev.c | 157 | ||||
-rw-r--r-- | net/core/net-sysfs.c | 36 |
2 files changed, 167 insertions, 26 deletions
diff --git a/net/core/dev.c b/net/core/dev.c index d13cddcac41f..1c3f0997e857 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2067,11 +2067,13 @@ int netdev_txq_to_tc(struct net_device *dev, unsigned int txq) struct netdev_tc_txq *tc = &dev->tc_to_txq[0]; int i; + /* walk through the TCs and see if it falls into any of them */ for (i = 0; i < TC_MAX_QUEUE; i++, tc++) { if ((txq - tc->offset) < tc->count) return i; } + /* didn't find it, just return -1 to indicate no match */ return -1; } @@ -2260,7 +2262,14 @@ int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask, unsigned int nr_ids; if (dev->num_tc) { + /* Do not allow XPS on subordinate device directly */ num_tc = dev->num_tc; + if (num_tc < 0) + return -EINVAL; + + /* If queue belongs to subordinate dev use its map */ + dev = netdev_get_tx_queue(dev, index)->sb_dev ? : dev; + tc = netdev_txq_to_tc(dev, index); if (tc < 0) return -EINVAL; @@ -2448,11 +2457,25 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask, EXPORT_SYMBOL(netif_set_xps_queue); #endif +static void netdev_unbind_all_sb_channels(struct net_device *dev) +{ + struct netdev_queue *txq = &dev->_tx[dev->num_tx_queues]; + + /* Unbind any subordinate channels */ + while (txq-- != &dev->_tx[0]) { + if (txq->sb_dev) + netdev_unbind_sb_channel(dev, txq->sb_dev); + } +} + void netdev_reset_tc(struct net_device *dev) { #ifdef CONFIG_XPS netif_reset_xps_queues_gt(dev, 0); #endif + netdev_unbind_all_sb_channels(dev); + + /* Reset TC configuration of device */ dev->num_tc = 0; memset(dev->tc_to_txq, 0, sizeof(dev->tc_to_txq)); memset(dev->prio_tc_map, 0, sizeof(dev->prio_tc_map)); @@ -2481,11 +2504,77 @@ int netdev_set_num_tc(struct net_device *dev, u8 num_tc) #ifdef CONFIG_XPS netif_reset_xps_queues_gt(dev, 0); #endif + netdev_unbind_all_sb_channels(dev); + dev->num_tc = num_tc; return 0; } EXPORT_SYMBOL(netdev_set_num_tc); +void netdev_unbind_sb_channel(struct net_device *dev, + struct net_device *sb_dev) +{ + struct netdev_queue *txq = &dev->_tx[dev->num_tx_queues]; + +#ifdef CONFIG_XPS + netif_reset_xps_queues_gt(sb_dev, 0); +#endif + memset(sb_dev->tc_to_txq, 0, sizeof(sb_dev->tc_to_txq)); + memset(sb_dev->prio_tc_map, 0, sizeof(sb_dev->prio_tc_map)); + + while (txq-- != &dev->_tx[0]) { + if (txq->sb_dev == sb_dev) + txq->sb_dev = NULL; + } +} +EXPORT_SYMBOL(netdev_unbind_sb_channel); + +int netdev_bind_sb_channel_queue(struct net_device *dev, + struct net_device *sb_dev, + u8 tc, u16 count, u16 offset) +{ + /* Make certain the sb_dev and dev are already configured */ + if (sb_dev->num_tc >= 0 || tc >= dev->num_tc) + return -EINVAL; + + /* We cannot hand out queues we don't have */ + if ((offset + count) > dev->real_num_tx_queues) + return -EINVAL; + + /* Record the mapping */ + sb_dev->tc_to_txq[tc].count = count; + sb_dev->tc_to_txq[tc].offset = offset; + + /* Provide a way for Tx queue to find the tc_to_txq map or + * XPS map for itself. + */ + while (count--) + netdev_get_tx_queue(dev, count + offset)->sb_dev = sb_dev; + + return 0; +} +EXPORT_SYMBOL(netdev_bind_sb_channel_queue); + +int netdev_set_sb_channel(struct net_device *dev, u16 channel) +{ + /* Do not use a multiqueue device to represent a subordinate channel */ + if (netif_is_multiqueue(dev)) + return -ENODEV; + + /* We allow channels 1 - 32767 to be used for subordinate channels. + * Channel 0 is meant to be "native" mode and used only to represent + * the main root device. We allow writing 0 to reset the device back + * to normal mode after being used as a subordinate channel. + */ + if (channel > S16_MAX) + return -EINVAL; + + dev->num_tc = -channel; + + return 0; +} +EXPORT_SYMBOL(netdev_set_sb_channel); + /* * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues * greater than real_num_tx_queues stale skbs on the qdisc must be flushed. @@ -2697,24 +2786,26 @@ EXPORT_SYMBOL(netif_device_attach); * Returns a Tx hash based on the given packet descriptor a Tx queues' number * to be used as a distribution range. */ -static u16 skb_tx_hash(const struct net_device *dev, struct sk_buff *skb) +static u16 skb_tx_hash(const struct net_device *dev, + const struct net_device *sb_dev, + struct sk_buff *skb) { u32 hash; u16 qoffset = 0; u16 qcount = dev->real_num_tx_queues; + if (dev->num_tc) { + u8 tc = netdev_get_prio_tc_map(dev, skb->priority); + + qoffset = sb_dev->tc_to_txq[tc].offset; + qcount = sb_dev->tc_to_txq[tc].count; + } + if (skb_rx_queue_recorded(skb)) { hash = skb_get_rx_queue(skb); while (unlikely(hash >= qcount)) hash -= qcount; - return hash; - } - - if (dev->num_tc) { - u8 tc = netdev_get_prio_tc_map(dev, skb->priority); - - qoffset = dev->tc_to_txq[tc].offset; - qcount = dev->tc_to_txq[tc].count; + return hash + qoffset; } return (u16) reciprocal_scale(skb_get_hash(skb), qcount) + qoffset; @@ -3484,7 +3575,8 @@ static int __get_xps_queue_idx(struct net_device *dev, struct sk_buff *skb, } #endif -static int get_xps_queue(struct net_device *dev, struct sk_buff *skb) +static int get_xps_queue(struct net_device *dev, struct net_device *sb_dev, + struct sk_buff *skb) { #ifdef CONFIG_XPS struct xps_dev_maps *dev_maps; @@ -3498,7 +3590,7 @@ static int get_xps_queue(struct net_device *dev, struct sk_buff *skb) if (!static_key_false(&xps_rxqs_needed)) goto get_cpus_map; - dev_maps = rcu_dereference(dev->xps_rxqs_map); + dev_maps = rcu_dereference(sb_dev->xps_rxqs_map); if (dev_maps) { int tci = sk_rx_queue_get(sk); @@ -3509,7 +3601,7 @@ static int get_xps_queue(struct net_device *dev, struct sk_buff *skb) get_cpus_map: if (queue_index < 0) { - dev_maps = rcu_dereference(dev->xps_cpus_map); + dev_maps = rcu_dereference(sb_dev->xps_cpus_map); if (dev_maps) { unsigned int tci = skb->sender_cpu - 1; @@ -3525,17 +3617,36 @@ get_cpus_map: #endif } -static u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb) +u16 dev_pick_tx_zero(struct net_device *dev, struct sk_buff *skb, + struct net_device *sb_dev, + select_queue_fallback_t fallback) +{ + return 0; +} +EXPORT_SYMBOL(dev_pick_tx_zero); + +u16 dev_pick_tx_cpu_id(struct net_device *dev, struct sk_buff *skb, + struct net_device *sb_dev, + select_queue_fallback_t fallback) +{ + return (u16)raw_smp_processor_id() % dev->real_num_tx_queues; +} +EXPORT_SYMBOL(dev_pick_tx_cpu_id); + +static u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb, + struct net_device *sb_dev) { struct sock *sk = skb->sk; int queue_index = sk_tx_queue_get(sk); + sb_dev = sb_dev ? : dev; + if (queue_index < 0 || skb->ooo_okay || queue_index >= dev->real_num_tx_queues) { - int new_index = get_xps_queue(dev, skb); + int new_index = get_xps_queue(dev, sb_dev, skb); if (new_index < 0) - new_index = skb_tx_hash(dev, skb); + new_index = skb_tx_hash(dev, sb_dev, skb); if (queue_index != new_index && sk && sk_fullsock(sk) && @@ -3550,7 +3661,7 @@ static u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb) struct netdev_queue *netdev_pick_tx(struct net_device *dev, struct sk_buff *skb, - void *accel_priv) + struct net_device *sb_dev) { int queue_index = 0; @@ -3565,10 +3676,10 @@ struct netdev_queue *netdev_pick_tx(struct net_device *dev, const struct net_device_ops *ops = dev->netdev_ops; if (ops->ndo_select_queue) - queue_index = ops->ndo_select_queue(dev, skb, accel_priv, + queue_index = ops->ndo_select_queue(dev, skb, sb_dev, __netdev_pick_tx); else - queue_index = __netdev_pick_tx(dev, skb); + queue_index = __netdev_pick_tx(dev, skb, sb_dev); queue_index = netdev_cap_txqueue(dev, queue_index); } @@ -3580,7 +3691,7 @@ struct netdev_queue *netdev_pick_tx(struct net_device *dev, /** * __dev_queue_xmit - transmit a buffer * @skb: buffer to transmit - * @accel_priv: private data used for L2 forwarding offload + * @sb_dev: suboordinate device used for L2 forwarding offload * * Queue a buffer for transmission to a network device. The caller must * have set the device and priority and built the buffer before calling @@ -3603,7 +3714,7 @@ struct netdev_queue *netdev_pick_tx(struct net_device *dev, * the BH enable code must have IRQs enabled so that it will not deadlock. * --BLG */ -static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv) +static int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev) { struct net_device *dev = skb->dev; struct netdev_queue *txq; @@ -3642,7 +3753,7 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv) else skb_dst_force(skb); - txq = netdev_pick_tx(dev, skb, accel_priv); + txq = netdev_pick_tx(dev, skb, sb_dev); q = rcu_dereference_bh(txq->qdisc); trace_net_dev_queue(skb); @@ -3716,9 +3827,9 @@ int dev_queue_xmit(struct sk_buff *skb) } EXPORT_SYMBOL(dev_queue_xmit); -int dev_queue_xmit_accel(struct sk_buff *skb, void *accel_priv) +int dev_queue_xmit_accel(struct sk_buff *skb, struct net_device *sb_dev) { - return __dev_queue_xmit(skb, accel_priv); + return __dev_queue_xmit(skb, sb_dev); } EXPORT_SYMBOL(dev_queue_xmit_accel); diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index f25ac5ff48a6..ffa1d18f2c2c 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -1047,13 +1047,30 @@ static ssize_t traffic_class_show(struct netdev_queue *queue, char *buf) { struct net_device *dev = queue->dev; - int index = get_netdev_queue_index(queue); - int tc = netdev_txq_to_tc(dev, index); + int index; + int tc; + if (!netif_is_multiqueue(dev)) + return -ENOENT; + + index = get_netdev_queue_index(queue); + + /* If queue belongs to subordinate dev use its TC mapping */ + dev = netdev_get_tx_queue(dev, index)->sb_dev ? : dev; + + tc = netdev_txq_to_tc(dev, index); if (tc < 0) return -EINVAL; - return sprintf(buf, "%u\n", tc); + /* We can report the traffic class one of two ways: + * Subordinate device traffic classes are reported with the traffic + * class first, and then the subordinate class so for example TC0 on + * subordinate device 2 will be reported as "0-2". If the queue + * belongs to the root device it will be reported with just the + * traffic class, so just "0" for TC 0 for example. + */ + return dev->num_tc < 0 ? sprintf(buf, "%u%d\n", tc, dev->num_tc) : + sprintf(buf, "%u\n", tc); } #ifdef CONFIG_XPS @@ -1214,10 +1231,20 @@ static ssize_t xps_cpus_show(struct netdev_queue *queue, cpumask_var_t mask; unsigned long index; + if (!netif_is_multiqueue(dev)) + return -ENOENT; + index = get_netdev_queue_index(queue); if (dev->num_tc) { + /* Do not allow XPS on subordinate device directly */ num_tc = dev->num_tc; + if (num_tc < 0) + return -EINVAL; + + /* If queue belongs to subordinate dev use its map */ + dev = netdev_get_tx_queue(dev, index)->sb_dev ? : dev; + tc = netdev_txq_to_tc(dev, index); if (tc < 0) return -EINVAL; @@ -1260,6 +1287,9 @@ static ssize_t xps_cpus_store(struct netdev_queue *queue, cpumask_var_t mask; int err; + if (!netif_is_multiqueue(dev)) + return -ENOENT; + if (!capable(CAP_NET_ADMIN)) return -EPERM; |